summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt28
-rw-r--r--src/video_core/buffer_cache/buffer_block.h27
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h288
-rw-r--r--src/video_core/compatible_formats.cpp155
-rw-r--r--src/video_core/compatible_formats.h34
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h1
-rw-r--r--src/video_core/engines/fermi_2d.cpp22
-rw-r--r--src/video_core/engines/fermi_2d.h13
-rw-r--r--src/video_core/engines/kepler_compute.cpp22
-rw-r--r--src/video_core/engines/kepler_compute.h18
-rw-r--r--src/video_core/engines/maxwell_3d.cpp44
-rw-r--r--src/video_core/engines/maxwell_3d.h26
-rw-r--r--src/video_core/engines/maxwell_dma.cpp297
-rw-r--r--src/video_core/engines/maxwell_dma.h348
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/fence_manager.h32
-rw-r--r--src/video_core/gpu.cpp40
-rw-r--r--src/video_core/gpu.h119
-rw-r--r--src/video_core/gpu_asynch.cpp17
-rw-r--r--src/video_core/gpu_asynch.h6
-rw-r--r--src/video_core/gpu_synch.cpp14
-rw-r--r--src/video_core/gpu_synch.h8
-rw-r--r--src/video_core/gpu_thread.cpp11
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt43
-rw-r--r--src/video_core/host_shaders/StringShaderHeader.cmake11
-rw-r--r--src/video_core/host_shaders/opengl_present.frag10
-rw-r--r--src/video_core/host_shaders/opengl_present.vert24
-rw-r--r--src/video_core/host_shaders/source_shader.h.in9
-rw-r--r--src/video_core/macro/macro.cpp60
-rw-r--r--src/video_core/macro/macro.h22
-rw-r--r--src/video_core/macro/macro_hle.cpp109
-rw-r--r--src/video_core/macro/macro_hle.h44
-rw-r--r--src/video_core/macro/macro_interpreter.cpp4
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp142
-rw-r--r--src/video_core/macro/macro_jit_x64.h10
-rw-r--r--src/video_core/memory_manager.cpp568
-rw-r--r--src/video_core/memory_manager.h201
-rw-r--r--src/video_core/morton.cpp276
-rw-r--r--src/video_core/query_cache.h49
-rw-r--r--src/video_core/rasterizer_cache.cpp7
-rw-r--r--src/video_core/rasterizer_cache.h253
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_base.cpp6
-rw-r--r--src/video_core/renderer_base.h22
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2102
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp81
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h53
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp54
-rw-r--r--src/video_core/renderer_opengl/gl_device.h25
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h7
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp562
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h61
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp307
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h97
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp90
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp133
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h27
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h28
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h25
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp227
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h14
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h120
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp390
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h47
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp116
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h207
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp329
-rw-r--r--src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp14
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp85
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp760
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp157
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h49
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp46
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp217
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp136
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp53
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp322
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp481
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h28
-rw-r--r--src/video_core/renderer_vulkan/vk_image.cpp38
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp263
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h85
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp71
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp685
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h65
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp129
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp312
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h196
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp63
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h43
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp54
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp124
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp152
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp74
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h68
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp116
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp334
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h52
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp170
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h134
-rw-r--r--src/video_core/shader/ast.h25
-rw-r--r--src/video_core/shader/async_shaders.cpp221
-rw-r--r--src/video_core/shader/async_shaders.h147
-rw-r--r--src/video_core/shader/control_flow.cpp57
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp3
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp6
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp35
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
-rw-r--r--src/video_core/shader/decode/image.cpp73
-rw-r--r--src/video_core/shader/decode/memory.cpp9
-rw-r--r--src/video_core/shader/decode/other.cpp3
-rw-r--r--src/video_core/shader/decode/texture.cpp57
-rw-r--r--src/video_core/shader/decode/video.cpp19
-rw-r--r--src/video_core/shader/decode/xmad.cpp15
-rw-r--r--src/video_core/shader/memory_util.cpp11
-rw-r--r--src/video_core/shader/memory_util.h6
-rw-r--r--src/video_core/shader/node.h75
-rw-r--r--src/video_core/shader/node_helper.h2
-rw-r--r--src/video_core/shader/registry.cpp70
-rw-r--r--src/video_core/shader/registry.h37
-rw-r--r--src/video_core/shader/shader_ir.cpp4
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp82
-rw-r--r--src/video_core/shader_cache.h240
-rw-r--r--src/video_core/shader_notify.cpp42
-rw-r--r--src/video_core/shader_notify.h29
-rw-r--r--src/video_core/surface.cpp257
-rw-r--r--src/video_core/surface.h770
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp149
-rw-r--r--src/video_core/texture_cache/surface_base.cpp20
-rw-r--r--src/video_core/texture_cache/surface_base.h13
-rw-r--r--src/video_core/texture_cache/surface_params.cpp143
-rw-r--r--src/video_core/texture_cache/surface_params.h7
-rw-r--r--src/video_core/texture_cache/texture_cache.h199
-rw-r--r--src/video_core/textures/convert.cpp6
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h47
-rw-r--r--src/video_core/textures/texture.cpp2
-rw-r--r--src/video_core/textures/texture.h49
-rw-r--r--src/video_core/video_core.cpp41
174 files changed, 11140 insertions, 7202 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2bf8d68ce..da9e9fdda 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,8 +1,12 @@
+add_subdirectory(host_shaders)
+
add_library(video_core STATIC
buffer_cache/buffer_block.h
buffer_cache/buffer_cache.h
buffer_cache/map_interval.cpp
buffer_cache/map_interval.h
+ compatible_formats.cpp
+ compatible_formats.h
dirty_flags.cpp
dirty_flags.h
dma_pusher.cpp
@@ -27,6 +31,8 @@ add_library(video_core STATIC
engines/shader_type.h
macro/macro.cpp
macro/macro.h
+ macro/macro_hle.cpp
+ macro/macro_hle.h
macro/macro_interpreter.cpp
macro/macro_interpreter.h
macro/macro_jit_x64.cpp
@@ -49,11 +55,11 @@ add_library(video_core STATIC
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
- rasterizer_cache.cpp
- rasterizer_cache.h
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
+ renderer_opengl/gl_arb_decompiler.cpp
+ renderer_opengl/gl_arb_decompiler.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_device.cpp
@@ -93,6 +99,9 @@ add_library(video_core STATIC
renderer_opengl/utils.h
sampler_cache.cpp
sampler_cache.h
+ shader_cache.h
+ shader_notify.cpp
+ shader_notify.h
shader/decode/arithmetic.cpp
shader/decode/arithmetic_immediate.cpp
shader/decode/bfe.cpp
@@ -123,6 +132,8 @@ add_library(video_core STATIC
shader/decode/other.cpp
shader/ast.cpp
shader/ast.h
+ shader/async_shaders.cpp
+ shader/async_shaders.h
shader/compiler_settings.cpp
shader/compiler_settings.h
shader/control_flow.cpp
@@ -179,6 +190,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_blit_screen.h
renderer_vulkan/vk_buffer_cache.cpp
renderer_vulkan/vk_buffer_cache.h
+ renderer_vulkan/vk_command_pool.cpp
+ renderer_vulkan/vk_command_pool.h
renderer_vulkan/vk_compute_pass.cpp
renderer_vulkan/vk_compute_pass.h
renderer_vulkan/vk_compute_pipeline.cpp
@@ -193,6 +206,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_image.cpp
renderer_vulkan/vk_image.h
+ renderer_vulkan/vk_master_semaphore.cpp
+ renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_memory_manager.cpp
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_pipeline_cache.cpp
@@ -203,8 +218,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
- renderer_vulkan/vk_resource_manager.cpp
- renderer_vulkan/vk_resource_manager.h
+ renderer_vulkan/vk_resource_pool.cpp
+ renderer_vulkan/vk_resource_pool.h
renderer_vulkan/vk_sampler_cache.cpp
renderer_vulkan/vk_sampler_cache.h
renderer_vulkan/vk_scheduler.cpp
@@ -235,6 +250,9 @@ create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad xbyak)
+add_dependencies(video_core host_shaders)
+target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
+
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
@@ -255,5 +273,5 @@ endif()
if (MSVC)
target_compile_options(video_core PRIVATE /we4267)
else()
- target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion)
+ target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion -Werror=switch)
endif()
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index e35ee0b67..e64170e66 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,48 +15,47 @@ namespace VideoCommon {
class BufferBlock {
public:
- bool Overlaps(const VAddr start, const VAddr end) const {
+ bool Overlaps(VAddr start, VAddr end) const {
return (cpu_addr < end) && (cpu_addr_end > start);
}
- bool IsInside(const VAddr other_start, const VAddr other_end) const {
+ bool IsInside(VAddr other_start, VAddr other_end) const {
return cpu_addr <= other_start && other_end <= cpu_addr_end;
}
- std::size_t GetOffset(const VAddr in_addr) {
+ std::size_t Offset(VAddr in_addr) const {
return static_cast<std::size_t>(in_addr - cpu_addr);
}
- VAddr GetCpuAddr() const {
+ VAddr CpuAddr() const {
return cpu_addr;
}
- VAddr GetCpuAddrEnd() const {
+ VAddr CpuAddrEnd() const {
return cpu_addr_end;
}
- void SetCpuAddr(const VAddr new_addr) {
+ void SetCpuAddr(VAddr new_addr) {
cpu_addr = new_addr;
cpu_addr_end = new_addr + size;
}
- std::size_t GetSize() const {
+ std::size_t Size() const {
return size;
}
- void SetEpoch(u64 new_epoch) {
- epoch = new_epoch;
+ u64 Epoch() const {
+ return epoch;
}
- u64 GetEpoch() {
- return epoch;
+ void SetEpoch(u64 new_epoch) {
+ epoch = new_epoch;
}
protected:
- explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
- SetCpuAddr(cpu_addr);
+ explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
+ SetCpuAddr(cpu_addr_);
}
- ~BufferBlock() = default;
private:
VAddr cpu_addr{};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b88fce2cd..e7edd733f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,66 +30,71 @@
namespace VideoCommon {
-template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
+template <typename Buffer, typename BufferType, typename StreamBuffer>
class BufferCache {
using IntervalSet = boost::icl::interval_set<VAddr>;
using IntervalType = typename IntervalSet::interval_type;
using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+ static constexpr u64 WRITE_PAGE_BIT = 11;
+ static constexpr u64 BLOCK_PAGE_BITS = 21;
+ static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+
public:
- using BufferInfo = std::pair<BufferType, u64>;
+ struct BufferInfo {
+ BufferType handle;
+ u64 offset;
+ u64 address;
+ };
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
- const auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr_opt) {
- return {GetEmptyBuffer(size), 0};
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ return GetEmptyBuffer(size);
}
- const VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) {
- if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
- auto& memory_manager = system.GPU().MemoryManager();
- const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
+ if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
+ const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) {
u8* dest;
if (is_granular) {
- dest = memory_manager.GetPointer(gpu_addr);
+ dest = gpu_memory.GetPointer(gpu_addr);
} else {
staging_buffer.resize(size);
dest = staging_buffer.data();
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
}
return ConstBufferUpload(dest, size);
}
if (is_granular) {
- u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
std::memcpy(dest, host_ptr, size);
});
} else {
- return StreamBufferUpload(
- size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
- });
+ return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
+ });
}
}
}
- OwnerBuffer block = GetBlock(cpu_addr, size);
- MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
+ Buffer* const block = GetBlock(*cpu_addr, size);
+ MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
if (!map) {
- return {GetEmptyBuffer(size), 0};
+ return GetEmptyBuffer(size);
}
if (is_written) {
map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
+ if (Settings::IsGPULevelHigh() &&
+ Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
MarkForAsyncFlush(map);
}
if (!map->is_written) {
@@ -98,7 +103,7 @@ public:
}
}
- return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
+ return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
}
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -110,31 +115,37 @@ public:
});
}
- void Map(std::size_t max_size) {
+ /// Prepares the buffer cache for data uploading
+ /// @param max_size Maximum number of bytes that will be uploaded
+ /// @return True when a stream buffer invalidation was required, false otherwise
+ bool Map(std::size_t max_size) {
std::lock_guard lock{mutex};
+ bool invalidated;
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
buffer_offset = buffer_offset_base;
+
+ return invalidated;
}
- /// Finishes the upload stream, returns true on bindings invalidation.
- bool Unmap() {
+ /// Finishes the upload stream
+ void Unmap() {
std::lock_guard lock{mutex};
-
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
- return std::exchange(invalidated, false);
}
+ /// Function called at the end of each frame, inteded for deferred operations
void TickFrame() {
++epoch;
+
while (!pending_destruction.empty()) {
// Delay at least 4 frames before destruction.
// This is due to triple buffering happening on some drivers.
static constexpr u64 epochs_to_destroy = 5;
- if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
+ if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
break;
}
- pending_destruction.pop_front();
+ pending_destruction.pop();
}
}
@@ -245,28 +256,18 @@ public:
committed_flushes.pop_front();
}
- virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
+ virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
protected:
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- std::unique_ptr<StreamBuffer> stream_buffer)
- : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
- stream_buffer_handle{this->stream_buffer->GetHandle()} {}
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ std::unique_ptr<StreamBuffer> stream_buffer_)
+ : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
+ stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
~BufferCache() = default;
- virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
-
- virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
-
- virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) = 0;
-
- virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
- u8* data) = 0;
-
- virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) = 0;
+ virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
return {};
@@ -321,19 +322,17 @@ protected:
}
private:
- MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
- std::size_t size) {
+ MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
- auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size;
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- u8* host_ptr = memory_manager.GetPointer(gpu_addr);
- UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
+ if (gpu_memory.IsGranularRange(gpu_addr, size)) {
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
+ block->Upload(block->Offset(cpu_addr), size, host_ptr);
} else {
staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
+ gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+ block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
}
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
}
@@ -369,15 +368,15 @@ private:
}
if (modified_inheritance) {
map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
+ if (Settings::IsGPULevelHigh() &&
+ Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
MarkForAsyncFlush(map);
}
}
return map;
}
- void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
- const VectorMapInterval& overlaps) {
+ void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
const IntervalType base_interval{start, end};
IntervalSet interval_set{};
interval_set.add(base_interval);
@@ -386,13 +385,13 @@ private:
interval_set.subtract(subtract);
}
for (auto& interval : interval_set) {
- std::size_t size = interval.upper() - interval.lower();
- if (size > 0) {
- staging_buffer.resize(size);
- system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
- UploadBlockData(block, block->GetOffset(interval.lower()), size,
- staging_buffer.data());
+ const std::size_t size = interval.upper() - interval.lower();
+ if (size == 0) {
+ continue;
}
+ staging_buffer.resize(size);
+ cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+ block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
}
}
@@ -422,11 +421,15 @@ private:
}
void FlushMap(MapInterval* map) {
+ const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
+ ASSERT_OR_EXECUTE(it != blocks.end(), return;);
+
+ std::shared_ptr<Buffer> block = it->second;
+
const std::size_t size = map->end - map->start;
- OwnerBuffer block = blocks[map->start >> block_page_bits];
staging_buffer.resize(size);
- DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
- system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
+ block->Download(block->Offset(map->start), size, staging_buffer.data());
+ cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0);
}
@@ -438,7 +441,7 @@ private:
buffer_ptr += size;
buffer_offset += size;
- return {stream_buffer_handle, uploaded_offset};
+ return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
}
void AlignBuffer(std::size_t alignment) {
@@ -448,121 +451,112 @@ private:
buffer_offset = offset_aligned;
}
- OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
- const std::size_t old_size = buffer->GetSize();
- const std::size_t new_size = old_size + block_page_size;
- const VAddr cpu_addr = buffer->GetCpuAddr();
- OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
- CopyBlock(buffer, new_buffer, 0, 0, old_size);
- buffer->SetEpoch(epoch);
- pending_destruction.push_back(buffer);
+ std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
+ const std::size_t old_size = buffer->Size();
+ const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
+ const VAddr cpu_addr = buffer->CpuAddr();
+ std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
+ new_buffer->CopyFrom(*buffer, 0, 0, old_size);
+ QueueDestruction(std::move(buffer));
+
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
- u64 page_start = cpu_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
- blocks[page_start] = new_buffer;
- ++page_start;
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+ blocks.insert_or_assign(page_start, new_buffer);
}
+
return new_buffer;
}
- OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
- const std::size_t size_1 = first->GetSize();
- const std::size_t size_2 = second->GetSize();
- const VAddr first_addr = first->GetCpuAddr();
- const VAddr second_addr = second->GetCpuAddr();
+ std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
+ std::shared_ptr<Buffer> second) {
+ const std::size_t size_1 = first->Size();
+ const std::size_t size_2 = second->Size();
+ const VAddr first_addr = first->CpuAddr();
+ const VAddr second_addr = second->CpuAddr();
const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2;
- OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
- CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
- CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
- first->SetEpoch(epoch);
- second->SetEpoch(epoch);
- pending_destruction.push_back(first);
- pending_destruction.push_back(second);
+
+ std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
+ new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
+ new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
+ QueueDestruction(std::move(first));
+ QueueDestruction(std::move(second));
+
const VAddr cpu_addr_end = new_addr + new_size - 1;
- u64 page_start = new_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
- blocks[page_start] = new_buffer;
- ++page_start;
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+ blocks.insert_or_assign(page_start, new_buffer);
}
return new_buffer;
}
- OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
- OwnerBuffer found;
+ Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
+ std::shared_ptr<Buffer> found;
+
const VAddr cpu_addr_end = cpu_addr + size - 1;
- u64 page_start = cpu_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
auto it = blocks.find(page_start);
if (it == blocks.end()) {
if (found) {
found = EnlargeBlock(found);
- } else {
- const VAddr start_addr = (page_start << block_page_bits);
- found = CreateBlock(start_addr, block_page_size);
- blocks[page_start] = found;
- }
- } else {
- if (found) {
- if (found == it->second) {
- ++page_start;
- continue;
- }
- found = MergeBlocks(found, it->second);
- } else {
- found = it->second;
+ continue;
}
+ const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
+ found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
+ blocks.insert_or_assign(page_start, found);
+ continue;
+ }
+ if (!found) {
+ found = it->second;
+ continue;
+ }
+ if (found != it->second) {
+ found = MergeBlocks(std::move(found), it->second);
}
- ++page_start;
}
- return found;
+ return found.get();
}
- void MarkRegionAsWritten(const VAddr start, const VAddr end) {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
- auto it = written_pages.find(page_start);
- if (it != written_pages.end()) {
- it->second = it->second + 1;
- } else {
- written_pages[page_start] = 1;
+ void MarkRegionAsWritten(VAddr start, VAddr end) {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
+ if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
+ ++it->second;
}
- ++page_start;
}
}
- void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
+ void UnmarkRegionAsWritten(VAddr start, VAddr end) {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
auto it = written_pages.find(page_start);
if (it != written_pages.end()) {
if (it->second > 1) {
- it->second = it->second - 1;
+ --it->second;
} else {
written_pages.erase(it);
}
}
- ++page_start;
}
}
- bool IsRegionWritten(const VAddr start, const VAddr end) const {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
+ bool IsRegionWritten(VAddr start, VAddr end) const {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
if (written_pages.count(page_start) > 0) {
return true;
}
- ++page_start;
}
return false;
}
+ void QueueDestruction(std::shared_ptr<Buffer> buffer) {
+ buffer->SetEpoch(epoch);
+ pending_destruction.push(std::move(buffer));
+ }
+
void MarkForAsyncFlush(MapInterval* map) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
@@ -571,12 +565,11 @@ private:
}
VideoCore::RasterizerInterface& rasterizer;
- Core::System& system;
+ Tegra::MemoryManager& gpu_memory;
+ Core::Memory::Memory& cpu_memory;
std::unique_ptr<StreamBuffer> stream_buffer;
- BufferType stream_buffer_handle{};
-
- bool invalidated = false;
+ BufferType stream_buffer_handle;
u8* buffer_ptr = nullptr;
u64 buffer_offset = 0;
@@ -586,18 +579,15 @@ private:
boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
mapped_addresses;
- static constexpr u64 write_page_bit = 11;
std::unordered_map<u64, u32> written_pages;
+ std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
- static constexpr u64 block_page_bits = 21;
- static constexpr u64 block_page_size = 1ULL << block_page_bits;
- std::unordered_map<u64, OwnerBuffer> blocks;
-
- std::list<OwnerBuffer> pending_destruction;
+ std::queue<std::shared_ptr<Buffer>> pending_destruction;
u64 epoch = 0;
u64 modified_ticks = 0;
std::vector<u8> staging_buffer;
+
std::list<MapInterval*> marked_for_unregister;
std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
new file mode 100644
index 000000000..b06c32c84
--- /dev/null
+++ b/src/video_core/compatible_formats.cpp
@@ -0,0 +1,155 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bitset>
+#include <cstddef>
+
+#include "video_core/compatible_formats.h"
+#include "video_core/surface.h"
+
+namespace VideoCore::Surface {
+
+namespace {
+
+// Compatibility table taken from Table 3.X.2 in:
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
+
+constexpr std::array VIEW_CLASS_128_BITS = {
+ PixelFormat::R32G32B32A32_FLOAT,
+ PixelFormat::R32G32B32A32_UINT,
+ PixelFormat::R32G32B32A32_SINT,
+};
+
+constexpr std::array VIEW_CLASS_96_BITS = {
+ PixelFormat::R32G32B32_FLOAT,
+};
+// Missing formats:
+// PixelFormat::RGB32UI,
+// PixelFormat::RGB32I,
+
+constexpr std::array VIEW_CLASS_64_BITS = {
+ PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
+ PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
+ PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
+ PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_SINT,
+};
+
+// TODO: How should we handle 48 bits?
+
+constexpr std::array VIEW_CLASS_32_BITS = {
+ PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
+ PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
+ PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
+ PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
+ PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM,
+ PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT,
+ PixelFormat::A2B10G10R10_UINT,
+};
+
+// TODO: How should we handle 24 bits?
+
+constexpr std::array VIEW_CLASS_16_BITS = {
+ PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
+ PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
+ PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
+};
+
+constexpr std::array VIEW_CLASS_8_BITS = {
+ PixelFormat::R8_UINT,
+ PixelFormat::R8_UNORM,
+ PixelFormat::R8_SINT,
+ PixelFormat::R8_SNORM,
+};
+
+constexpr std::array VIEW_CLASS_RGTC1_RED = {
+ PixelFormat::BC4_UNORM,
+ PixelFormat::BC4_SNORM,
+};
+
+constexpr std::array VIEW_CLASS_RGTC2_RG = {
+ PixelFormat::BC5_UNORM,
+ PixelFormat::BC5_SNORM,
+};
+
+constexpr std::array VIEW_CLASS_BPTC_UNORM = {
+ PixelFormat::BC7_UNORM,
+ PixelFormat::BC7_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
+ PixelFormat::BC6H_SFLOAT,
+ PixelFormat::BC6H_UFLOAT,
+};
+
+// Compatibility table taken from Table 4.X.1 in:
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
+
+constexpr std::array COPY_CLASS_128_BITS = {
+ PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
+ PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
+ PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
+ PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, PixelFormat::BC6H_SFLOAT,
+ PixelFormat::BC6H_UFLOAT,
+};
+// Missing formats:
+// PixelFormat::RGBA32I
+// COMPRESSED_RG_RGTC2
+
+constexpr std::array COPY_CLASS_64_BITS = {
+ PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
+ PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
+ PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
+ PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_SINT,
+ PixelFormat::BC1_RGBA_UNORM, PixelFormat::BC1_RGBA_SRGB,
+};
+// Missing formats:
+// COMPRESSED_RGB_S3TC_DXT1_EXT
+// COMPRESSED_SRGB_S3TC_DXT1_EXT
+// COMPRESSED_RGBA_S3TC_DXT1_EXT
+// COMPRESSED_SIGNED_RED_RGTC1
+
+void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
+ compatiblity[format_a][format_b] = true;
+ compatiblity[format_b][format_a] = true;
+}
+
+void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
+ Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
+}
+
+template <typename Range>
+void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
+ for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
+ for (auto it_b = it_a; it_b != range.end(); ++it_b) {
+ Enable(compatibility, *it_a, *it_b);
+ }
+ }
+}
+
+} // Anonymous namespace
+
+FormatCompatibility::FormatCompatibility() {
+ for (size_t i = 0; i < MaxPixelFormat; ++i) {
+ // Identity is allowed
+ Enable(view, i, i);
+ }
+
+ EnableRange(view, VIEW_CLASS_128_BITS);
+ EnableRange(view, VIEW_CLASS_96_BITS);
+ EnableRange(view, VIEW_CLASS_64_BITS);
+ EnableRange(view, VIEW_CLASS_32_BITS);
+ EnableRange(view, VIEW_CLASS_16_BITS);
+ EnableRange(view, VIEW_CLASS_8_BITS);
+ EnableRange(view, VIEW_CLASS_RGTC1_RED);
+ EnableRange(view, VIEW_CLASS_RGTC2_RG);
+ EnableRange(view, VIEW_CLASS_BPTC_UNORM);
+ EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
+
+ copy = view;
+ EnableRange(copy, COPY_CLASS_128_BITS);
+ EnableRange(copy, COPY_CLASS_64_BITS);
+}
+
+} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
new file mode 100644
index 000000000..51766349b
--- /dev/null
+++ b/src/video_core/compatible_formats.h
@@ -0,0 +1,34 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <cstddef>
+
+#include "video_core/surface.h"
+
+namespace VideoCore::Surface {
+
+class FormatCompatibility {
+public:
+ using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
+
+ explicit FormatCompatibility();
+
+ bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
+ return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
+ }
+
+ bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
+ return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
+ }
+
+private:
+ Table view;
+ Table copy;
+};
+
+} // namespace VideoCore::Surface
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ebe139504..f46e81bb7 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,6 +93,7 @@ public:
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0;
+ virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
virtual u32 GetBoundBuffer() const = 0;
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ff10ff40d..9409c4075 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,13 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+Fermi2D::Fermi2D() = default;
+
+Fermi2D::~Fermi2D() = default;
+
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
@@ -81,13 +87,13 @@ void Fermi2D::HandleSurfaceCopy() {
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
dst_blit_y2};
- Config copy_config;
- copy_config.operation = regs.operation;
- copy_config.filter = regs.blit_control.filter;
- copy_config.src_rect = src_rect;
- copy_config.dst_rect = dst_rect;
-
- if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+ const Config copy_config{
+ .operation = regs.operation,
+ .filter = regs.blit_control.filter,
+ .src_rect = src_rect,
+ .dst_rect = dst_rect,
+ };
+ if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 8f37d053f..0909709ec 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -34,8 +34,11 @@ namespace Tegra::Engines {
class Fermi2D final : public EngineInterface {
public:
- explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
- ~Fermi2D() = default;
+ explicit Fermi2D();
+ ~Fermi2D();
+
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
@@ -142,14 +145,14 @@ public:
} regs{};
struct Config {
- Operation operation;
- Filter filter;
+ Operation operation{};
+ Filter filter{};
Common::Rectangle<u32> src_rect;
Common::Rectangle<u32> dst_rect;
};
private:
- VideoCore::RasterizerInterface& rasterizer;
+ VideoCore::RasterizerInterface* rasterizer;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f6237fc6a..898370739 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -16,14 +16,15 @@
namespace Tegra::Engines {
-KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
- memory_manager,
- regs.upload} {}
+KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
KeplerCompute::~KeplerCompute() = default;
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerCompute register, increase the size of the Regs structure");
@@ -92,8 +93,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
ASSERT(stage == ShaderType::Compute);
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
+ return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
+ const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
@@ -101,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
}
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
void KeplerCompute::ProcessLaunch() {
@@ -116,7 +120,7 @@ void KeplerCompute::ProcessLaunch() {
const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
- rasterizer.DispatchCompute(code_addr);
+ rasterizer->DispatchCompute(code_addr);
}
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 18ceedfaf..7f2500aab 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -42,10 +42,12 @@ namespace Tegra::Engines {
class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
public:
- explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
+ explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
~KeplerCompute();
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
static constexpr std::size_t NumConstBuffers = 8;
struct Regs {
@@ -219,6 +221,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
+ SamplerDescriptor AccessSampler(u32 handle) const override;
+
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
@@ -228,11 +232,6 @@ public:
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
private:
- Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
- MemoryManager& memory_manager;
- Upload::State upload_state;
-
void ProcessLaunch();
/// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -240,6 +239,11 @@ private:
/// Retrieves information about a specific TSC entry from the TSC buffer.
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+
+ Core::System& system;
+ MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+ Upload::State upload_state;
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e46b153f9..57ebc785f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,14 +22,19 @@ using VideoCore::QueryType;
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
-Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
- macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
+Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
+ upload_state{memory_manager, regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
}
+Maxwell3D::~Maxwell3D() = default;
+
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+
void Maxwell3D::InitializeRegisterDefaults() {
// Initializes registers to their default values - what games expect them to be at boot. This is
// for certain registers that may not be explicitly set by games.
@@ -128,7 +133,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro.
- macro_engine->Execute(macro_positions[entry], parameters);
+ macro_engine->Execute(*this, macro_positions[entry], parameters);
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
FlushMMEInlineDraw();
}
@@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
switch (method) {
case MAXWELL3D_REG_INDEX(wait_for_idle): {
- rasterizer.WaitForIdle();
+ rasterizer->WaitForIdle();
break;
}
case MAXWELL3D_REG_INDEX(shadow_ram_control): {
@@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
- rasterizer.Draw(is_indexed, true);
+ rasterizer->Draw(is_indexed, true);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
if (regs.query.query_get.fence == 1) {
- rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+ rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
} else {
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
}
@@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() {
void Maxwell3D::ProcessCounterReset() {
switch (regs.counter_reset) {
case Regs::CounterReset::SampleCnt:
- rasterizer.ResetCounter(QueryType::SamplesPassed);
+ rasterizer->ResetCounter(QueryType::SamplesPassed);
break;
default:
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
@@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() {
const u32 increment = regs.sync_info.increment.Value();
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
if (increment) {
- rasterizer.SignalSyncPoint(sync_point);
+ rasterizer->SignalSyncPoint(sync_point);
}
}
@@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() {
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
if (ShouldExecute()) {
- rasterizer.Draw(is_indexed, false);
+ rasterizer->Draw(is_indexed, false);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -590,9 +595,9 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
return 0;
case Regs::QuerySelect::SamplesPassed:
// Deferred.
- rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
- system.GPU().GetTicks());
- return {};
+ rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+ system.GPU().GetTicks());
+ return std::nullopt;
default:
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
static_cast<u32>(regs.query.query_get.select.Value()));
@@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() {
regs.clear_buffers.R == regs.clear_buffers.B &&
regs.clear_buffers.R == regs.clear_buffers.A);
- rasterizer.Clear();
+ rasterizer->Clear();
}
u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
@@ -740,8 +745,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
+ return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
+ const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
@@ -749,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
}
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index b827b112f..bc289c55d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -51,9 +51,11 @@ namespace Tegra::Engines {
class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
public:
- explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
- ~Maxwell3D() = default;
+ explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
+ ~Maxwell3D();
+
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -598,6 +600,7 @@ public:
BitField<4, 3, u32> block_height;
BitField<8, 3, u32> block_depth;
BitField<12, 1, InvMemoryLayout> type;
+ BitField<16, 1, u32> is_3d;
} memory_layout;
union {
BitField<0, 16, u32> layers;
@@ -646,7 +649,7 @@ public:
GetX() + GetWidth(), // right
GetY() // bottom
};
- };
+ }
f32 GetX() const {
return std::max(0.0f, translate_x - std::fabs(scale_x));
@@ -1403,6 +1406,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
+ SamplerDescriptor AccessSampler(u32 handle) const override;
+
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
@@ -1415,6 +1420,14 @@ public:
return execute_on;
}
+ VideoCore::RasterizerInterface& Rasterizer() {
+ return *rasterizer;
+ }
+
+ const VideoCore::RasterizerInterface& Rasterizer() const {
+ return *rasterizer;
+ }
+
/// Notify a memory write has happened.
void OnMemoryWrite() {
dirty.flags |= dirty.on_write_stores;
@@ -1449,11 +1462,10 @@ private:
void InitializeRegisterDefaults();
Core::System& system;
-
- VideoCore::RasterizerInterface& rasterizer;
-
MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
/// Start offsets of each macro in macro_memory
std::array<u32, 0x80> macro_positions = {};
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 01d7df405..e88290754 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -14,50 +14,45 @@
namespace Tegra::Engines {
+using namespace Texture;
+
MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
: system{system}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- ASSERT_MSG(method < Regs::NUM_REGS,
- "Invalid MaxwellDMA register, increase the size of the Regs structure");
+ ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
regs.reg_array[method] = method_argument;
-#define MAXWELLDMA_REG_INDEX(field_name) \
- (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
-
- switch (method) {
- case MAXWELLDMA_REG_INDEX(exec): {
- HandleCopy();
- break;
- }
+ if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
+ Launch();
}
-
-#undef MAXWELLDMA_REG_INDEX
}
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) {
- for (std::size_t i = 0; i < amount; i++) {
+ for (size_t i = 0; i < amount; ++i) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
}
}
-void MaxwellDMA::HandleCopy() {
- LOG_TRACE(HW_GPU, "Requested a DMA copy");
-
- const GPUVAddr source = regs.src_address.Address();
- const GPUVAddr dest = regs.dst_address.Address();
+void MaxwellDMA::Launch() {
+ LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
+ static_cast<GPUVAddr>(regs.offset_out));
// TODO(Subv): Perform more research and implement all features of this engine.
- ASSERT(regs.exec.enable_swizzle == 0);
- ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
- ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
- ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
- ASSERT(regs.dst_params.pos_x == 0);
- ASSERT(regs.dst_params.pos_y == 0);
-
- if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+ const LaunchDMA& launch = regs.launch_dma;
+ ASSERT(launch.remap_enable == 0);
+ ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
+ ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
+ ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
+ ASSERT(regs.dst_params.origin.x == 0);
+ ASSERT(regs.dst_params.origin.y == 0);
+
+ const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+ const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+
+ if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
return;
@@ -66,144 +61,156 @@ void MaxwellDMA::HandleCopy() {
// All copies here update the main memory, so mark all rasterizer states as invalid.
system.GPU().Maxwell3D().OnMemoryWrite();
- if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
- // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
- // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
- // y_count).
- if (!regs.exec.enable_2d) {
- memory_manager.CopyBlock(dest, source, regs.x_count);
- return;
- }
+ if (is_src_pitch && is_dst_pitch) {
+ CopyPitchToPitch();
+ } else {
+ ASSERT(launch.multi_line_enable == 1);
- // If both the source and the destination are in linear layout, perform a line-by-line
- // copy. We're going to take a subrect of size (x_count, y_count) from the source
- // rectangle. There is no need to manually flush/invalidate the regions because
- // CopyBlock does that for us.
- for (u32 line = 0; line < regs.y_count; ++line) {
- const GPUVAddr source_line = source + line * regs.src_pitch;
- const GPUVAddr dest_line = dest + line * regs.dst_pitch;
- memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
+ if (!is_src_pitch && is_dst_pitch) {
+ CopyBlockLinearToPitch();
+ } else {
+ CopyPitchToBlockLinear();
}
- return;
}
+}
- ASSERT(regs.exec.enable_2d == 1);
-
- if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
-
- ASSERT(regs.src_params.BlockDepth() == 0);
- // Optimized path for micro copies.
- if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) {
- const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
- const std::size_t src_size = Texture::GetGOBSize();
- const std::size_t dst_size = regs.dst_pitch * regs.y_count;
- u32 pos_x = regs.src_params.pos_x;
- u32 pos_y = regs.src_params.pos_y;
- const u64 offset =
- Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y,
- regs.src_params.BlockDepth(), bytes_per_pixel);
- const u32 x_in_gob = 64 / bytes_per_pixel;
- pos_x = pos_x % x_in_gob;
- pos_y = pos_y % 8;
-
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
-
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
-
- if (Settings::IsGPULevelExtreme()) {
- memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- } else {
- memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size);
- memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
- }
-
- Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
- regs.src_params.size_x, bytes_per_pixel, read_buffer.data(),
- write_buffer.data(), regs.src_params.BlockHeight(), pos_x,
- pos_y);
-
- memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
-
- return;
- }
- // If the input is tiled and the output is linear, deswizzle the input and copy it over.
- const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
- const std::size_t src_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
- regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
-
- const std::size_t src_layer_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
- regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
+void MaxwellDMA::CopyPitchToPitch() {
+ // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
+ // buffer of length `line_length_in`.
+ // Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
+ if (!regs.launch_dma.multi_line_enable) {
+ memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in);
+ return;
+ }
- const std::size_t dst_size = regs.dst_pitch * regs.y_count;
+ // Perform a line-by-line copy.
+ // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
+ // There is no need to manually flush/invalidate the regions because CopyBlock does that for us.
+ for (u32 line = 0; line < regs.line_count; ++line) {
+ const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
+ const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
+ memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
+ }
+}
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
+void MaxwellDMA::CopyBlockLinearToPitch() {
+ UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
+ UNIMPLEMENTED_IF(regs.src_params.layer != 0);
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+ // Optimized path for micro copies.
+ const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+ if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
+ FastCopyBlockLinearToPitch();
+ return;
+ }
- if (Settings::IsGPULevelExtreme()) {
- memory_manager.ReadBlock(source, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- } else {
- memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
- memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
- }
+ // Deswizzle the input and copy it over.
+ const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
+ const Parameters& src_params = regs.src_params;
+ const u32 width = src_params.width;
+ const u32 height = src_params.height;
+ const u32 depth = src_params.depth;
+ const u32 block_height = src_params.block_size.height;
+ const u32 block_depth = src_params.block_size.depth;
+ const size_t src_size =
+ CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
+ const size_t src_layer_size =
+ CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- Texture::UnswizzleSubrect(
- regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
- read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
- regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
+ memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
- memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
- } else {
- ASSERT(regs.dst_params.BlockDepth() == 0);
+ UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
+ block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
+ read_buffer.data());
- const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
+ memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
- const std::size_t dst_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
- regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+void MaxwellDMA::CopyPitchToBlockLinear() {
+ const auto& dst_params = regs.dst_params;
+ const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
+ const u32 width = dst_params.width;
+ const u32 height = dst_params.height;
+ const u32 depth = dst_params.depth;
+ const u32 block_height = dst_params.block_size.height;
+ const u32 block_depth = dst_params.block_size.depth;
+ const size_t dst_size =
+ CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
+ const size_t dst_layer_size =
+ CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
+
+ const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- const std::size_t dst_layer_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
- regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+ if (Settings::IsGPULevelExtreme()) {
+ memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+ } else {
+ memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
+ memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
+ }
- const std::size_t src_size = regs.src_pitch * regs.y_count;
+ // If the input is linear and the output is tiled, swizzle the input and copy it over.
+ if (regs.dst_params.block_size.depth > 0) {
+ ASSERT(dst_params.layer == 0);
+ SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height,
+ bytes_per_pixel, block_height, block_depth, dst_params.origin.x,
+ dst_params.origin.y, write_buffer.data(), read_buffer.data());
+ } else {
+ SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel,
+ write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(),
+ block_height, dst_params.origin.x, dst_params.origin.y);
+ }
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
+ memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+void MaxwellDMA::FastCopyBlockLinearToPitch() {
+ const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
+ const size_t src_size = GOB_SIZE;
+ const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+ u32 pos_x = regs.src_params.origin.x;
+ u32 pos_y = regs.src_params.origin.y;
+ const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
+ regs.src_params.block_size.height, bytes_per_pixel);
+ const u32 x_in_gob = 64 / bytes_per_pixel;
+ pos_x = pos_x % x_in_gob;
+ pos_y = pos_y % 8;
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- if (Settings::IsGPULevelExtreme()) {
- memory_manager.ReadBlock(source, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- } else {
- memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
- memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
- }
+ if (Settings::IsGPULevelExtreme()) {
+ memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+ } else {
+ memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
+ memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
+ }
- // If the input is linear and the output is tiled, swizzle the input and copy it over.
- Texture::SwizzleSubrect(
- regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
- write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
- regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
+ UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
+ bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
+ write_buffer.data(), read_buffer.data());
- memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
- }
+ memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 502dd8509..50f445efc 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -24,160 +24,190 @@ class MemoryManager;
namespace Tegra::Engines {
/**
- * This Engine is known as GK104_Copy. Documentation can be found in:
+ * This engine is known as gk104_copy. Documentation can be found in:
+ * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
*/
class MaxwellDMA final : public EngineInterface {
public:
- explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
- ~MaxwellDMA() = default;
-
- /// Write the value to the register identified by method.
- void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
-
- /// Write multiple values to the register identified by method.
- void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
- u32 methods_pending) override;
+ struct PackedGPUVAddr {
+ u32 upper;
+ u32 lower;
+
+ constexpr operator GPUVAddr() const noexcept {
+ return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
+ }
+ };
+
+ union BlockSize {
+ BitField<0, 4, u32> width;
+ BitField<4, 4, u32> height;
+ BitField<8, 4, u32> depth;
+ BitField<12, 4, u32> gob_height;
+ };
+ static_assert(sizeof(BlockSize) == 4);
+
+ union Origin {
+ BitField<0, 16, u32> x;
+ BitField<16, 16, u32> y;
+ };
+ static_assert(sizeof(Origin) == 4);
+
+ struct Parameters {
+ BlockSize block_size;
+ u32 width;
+ u32 height;
+ u32 depth;
+ u32 layer;
+ Origin origin;
+ };
+ static_assert(sizeof(Parameters) == 24);
+
+ struct Semaphore {
+ PackedGPUVAddr address;
+ u32 payload;
+ };
+ static_assert(sizeof(Semaphore) == 12);
+
+ struct RenderEnable {
+ enum class Mode : u32 {
+ FALSE = 0,
+ TRUE = 1,
+ CONDITIONAL = 2,
+ RENDER_IF_EQUAL = 3,
+ RENDER_IF_NOT_EQUAL = 4,
+ };
- struct Regs {
- static constexpr std::size_t NUM_REGS = 0x1D6;
+ PackedGPUVAddr address;
+ BitField<0, 3, Mode> mode;
+ };
+ static_assert(sizeof(RenderEnable) == 12);
+
+ enum class PhysModeTarget : u32 {
+ LOCAL_FB = 0,
+ COHERENT_SYSMEM = 1,
+ NONCOHERENT_SYSMEM = 2,
+ };
+ using PhysMode = BitField<0, 2, PhysModeTarget>;
+
+ union LaunchDMA {
+ enum class DataTransferType : u32 {
+ NONE = 0,
+ PIPELINED = 1,
+ NON_PIPELINED = 2,
+ };
- struct Parameters {
- union {
- BitField<0, 4, u32> block_depth;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_width;
- };
- u32 size_x;
- u32 size_y;
- u32 size_z;
- u32 pos_z;
- union {
- BitField<0, 16, u32> pos_x;
- BitField<16, 16, u32> pos_y;
- };
+ enum class SemaphoreType : u32 {
+ NONE = 0,
+ RELEASE_ONE_WORD_SEMAPHORE = 1,
+ RELEASE_FOUR_WORD_SEMAPHORE = 2,
+ };
- u32 BlockHeight() const {
- return block_height.Value();
- }
+ enum class InterruptType : u32 {
+ NONE = 0,
+ BLOCKING = 1,
+ NON_BLOCKING = 2,
+ };
- u32 BlockDepth() const {
- return block_depth.Value();
- }
+ enum class MemoryLayout : u32 {
+ BLOCKLINEAR = 0,
+ PITCH = 1,
};
- static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
+ enum class Type : u32 {
+ VIRTUAL = 0,
+ PHYSICAL = 1,
+ };
- enum class ComponentMode : u32 {
- Src0 = 0,
- Src1 = 1,
- Src2 = 2,
- Src3 = 3,
- Const0 = 4,
- Const1 = 5,
- Zero = 6,
+ enum class SemaphoreReduction : u32 {
+ IMIN = 0,
+ IMAX = 1,
+ IXOR = 2,
+ IAND = 3,
+ IOR = 4,
+ IADD = 5,
+ INC = 6,
+ DEC = 7,
+ FADD = 0xA,
};
- enum class CopyMode : u32 {
- None = 0,
- Unk1 = 1,
- Unk2 = 2,
+ enum class SemaphoreReductionSign : u32 {
+ SIGNED = 0,
+ UNSIGNED = 1,
};
- enum class QueryMode : u32 {
- None = 0,
- Short = 1,
- Long = 2,
+ enum class BypassL2 : u32 {
+ USE_PTE_SETTING = 0,
+ FORCE_VOLATILE = 1,
};
- enum class QueryIntr : u32 {
- None = 0,
- Block = 1,
- NonBlock = 2,
+ BitField<0, 2, DataTransferType> data_transfer_type;
+ BitField<2, 1, u32> flush_enable;
+ BitField<3, 2, SemaphoreType> semaphore_type;
+ BitField<5, 2, InterruptType> interrupt_type;
+ BitField<7, 1, MemoryLayout> src_memory_layout;
+ BitField<8, 1, MemoryLayout> dst_memory_layout;
+ BitField<9, 1, u32> multi_line_enable;
+ BitField<10, 1, u32> remap_enable;
+ BitField<11, 1, u32> rmwdisable;
+ BitField<12, 1, Type> src_type;
+ BitField<13, 1, Type> dst_type;
+ BitField<14, 4, SemaphoreReduction> semaphore_reduction;
+ BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
+ BitField<19, 1, u32> reduction_enable;
+ BitField<20, 1, BypassL2> bypass_l2;
+ };
+ static_assert(sizeof(LaunchDMA) == 4);
+
+ struct RemapConst {
+ enum Swizzle : u32 {
+ SRC_X = 0,
+ SRC_Y = 1,
+ SRC_Z = 2,
+ SRC_W = 3,
+ CONST_A = 4,
+ CONST_B = 5,
+ NO_WRITE = 6,
};
- union {
- struct {
- INSERT_UNION_PADDING_WORDS(0xC0);
-
- struct {
- union {
- BitField<0, 2, CopyMode> copy_mode;
- BitField<2, 1, u32> flush;
-
- BitField<3, 2, QueryMode> query_mode;
- BitField<5, 2, QueryIntr> query_intr;
-
- BitField<7, 1, u32> is_src_linear;
- BitField<8, 1, u32> is_dst_linear;
-
- BitField<9, 1, u32> enable_2d;
- BitField<10, 1, u32> enable_swizzle;
- };
- } exec;
-
- INSERT_UNION_PADDING_WORDS(0x3F);
-
- struct {
- u32 address_high;
- u32 address_low;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
- } src_address;
-
- struct {
- u32 address_high;
- u32 address_low;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
- } dst_address;
-
- u32 src_pitch;
- u32 dst_pitch;
- u32 x_count;
- u32 y_count;
-
- INSERT_UNION_PADDING_WORDS(0xB8);
-
- u32 const0;
- u32 const1;
- union {
- BitField<0, 4, ComponentMode> component0;
- BitField<4, 4, ComponentMode> component1;
- BitField<8, 4, ComponentMode> component2;
- BitField<12, 4, ComponentMode> component3;
- BitField<16, 2, u32> component_size;
- BitField<20, 3, u32> src_num_components;
- BitField<24, 3, u32> dst_num_components;
-
- u32 SrcBytePerPixel() const {
- return src_num_components.Value() * component_size.Value();
- }
- u32 DstBytePerPixel() const {
- return dst_num_components.Value() * component_size.Value();
- }
- } swizzle_config;
+ PackedGPUVAddr address;
- Parameters dst_params;
+ union {
+ BitField<0, 3, Swizzle> dst_x;
+ BitField<4, 3, Swizzle> dst_y;
+ BitField<8, 3, Swizzle> dst_z;
+ BitField<12, 3, Swizzle> dst_w;
+ BitField<16, 2, u32> component_size_minus_one;
+ BitField<20, 2, u32> num_src_components_minus_one;
+ BitField<24, 2, u32> num_dst_components_minus_one;
+ };
+ };
+ static_assert(sizeof(RemapConst) == 12);
- INSERT_UNION_PADDING_WORDS(1);
+ explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
+ ~MaxwellDMA() = default;
- Parameters src_params;
+ /// Write the value to the register identified by method.
+ void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
- INSERT_UNION_PADDING_WORDS(0x13);
- };
- std::array<u32, NUM_REGS> reg_array;
- };
- } regs{};
+ /// Write multiple values to the register identified by method.
+ void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+ u32 methods_pending) override;
private:
+ /// Performs the copy from the source buffer to the destination buffer as configured in the
+ /// registers.
+ void Launch();
+
+ void CopyPitchToPitch();
+
+ void CopyBlockLinearToPitch();
+
+ void CopyPitchToBlockLinear();
+
+ void FastCopyBlockLinearToPitch();
+
Core::System& system;
MemoryManager& memory_manager;
@@ -185,28 +215,58 @@ private:
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
- /// Performs the copy from the source buffer to the destination buffer as configured in the
- /// registers.
- void HandleCopy();
-};
+ static constexpr std::size_t NUM_REGS = 0x800;
+ struct Regs {
+ union {
+ struct {
+ u32 reserved[0x40];
+ u32 nop;
+ u32 reserved01[0xf];
+ u32 pm_trigger;
+ u32 reserved02[0x3f];
+ Semaphore semaphore;
+ u32 reserved03[0x2];
+ RenderEnable render_enable;
+ PhysMode src_phys_mode;
+ PhysMode dst_phys_mode;
+ u32 reserved04[0x26];
+ LaunchDMA launch_dma;
+ u32 reserved05[0x3f];
+ PackedGPUVAddr offset_in;
+ PackedGPUVAddr offset_out;
+ u32 pitch_in;
+ u32 pitch_out;
+ u32 line_length_in;
+ u32 line_count;
+ u32 reserved06[0xb8];
+ RemapConst remap_const;
+ Parameters dst_params;
+ u32 reserved07[0x1];
+ Parameters src_params;
+ u32 reserved08[0x275];
+ u32 pm_trigger_end;
+ u32 reserved09[0x3ba];
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(exec, 0xC0);
-ASSERT_REG_POSITION(src_address, 0x100);
-ASSERT_REG_POSITION(dst_address, 0x102);
-ASSERT_REG_POSITION(src_pitch, 0x104);
-ASSERT_REG_POSITION(dst_pitch, 0x105);
-ASSERT_REG_POSITION(x_count, 0x106);
-ASSERT_REG_POSITION(y_count, 0x107);
-ASSERT_REG_POSITION(const0, 0x1C0);
-ASSERT_REG_POSITION(const1, 0x1C1);
-ASSERT_REG_POSITION(swizzle_config, 0x1C2);
-ASSERT_REG_POSITION(dst_params, 0x1C3);
-ASSERT_REG_POSITION(src_params, 0x1CA);
+ ASSERT_REG_POSITION(launch_dma, 0xC0);
+ ASSERT_REG_POSITION(offset_in, 0x100);
+ ASSERT_REG_POSITION(offset_out, 0x102);
+ ASSERT_REG_POSITION(pitch_in, 0x104);
+ ASSERT_REG_POSITION(pitch_out, 0x105);
+ ASSERT_REG_POSITION(line_length_in, 0x106);
+ ASSERT_REG_POSITION(line_count, 0x107);
+ ASSERT_REG_POSITION(remap_const, 0x1C0);
+ ASSERT_REG_POSITION(dst_params, 0x1C3);
+ ASSERT_REG_POSITION(src_params, 0x1CA);
#undef ASSERT_REG_POSITION
+};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e7cb87589..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -661,6 +661,10 @@ union Instruction {
constexpr Instruction(u64 value) : value{value} {}
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
+ constexpr bool Bit(u64 offset) const {
+ return ((value >> offset) & 1) != 0;
+ }
+
BitField<0, 8, Register> gpr0;
BitField<8, 8, Register> gpr8;
union {
@@ -1874,7 +1878,9 @@ public:
HSETP2_C,
HSETP2_R,
HSETP2_IMM,
+ HSET2_C,
HSET2_R,
+ HSET2_IMM,
POPC_C,
POPC_R,
POPC_IMM,
@@ -2194,7 +2200,9 @@ private:
INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
+ INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
+ INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 8b2a6a42c..de6991ef6 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -5,15 +5,10 @@
#pragma once
#include <algorithm>
-#include <array>
-#include <memory>
#include <queue>
-#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
-#include "core/memory.h"
-#include "core/settings.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -79,8 +74,6 @@ public:
}
void WaitPendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
@@ -88,8 +81,8 @@ public:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
@@ -98,13 +91,13 @@ public:
}
protected:
- FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TTextureCache& texture_cache, TTBufferCache& buffer_cache,
- TQueryCache& query_cache)
- : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
- buffer_cache{buffer_cache}, query_cache{query_cache} {}
+ explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+ TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
+ TQueryCache& query_cache_)
+ : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
- virtual ~FenceManager() {}
+ virtual ~FenceManager() = default;
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
@@ -118,16 +111,15 @@ protected:
/// Waits until a fence has been signalled by the host GPU.
virtual void WaitFence(TFence& fence) = 0;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
void TryReleasePendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
@@ -135,8 +127,8 @@ private:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8eb017f65..4bb9256e9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <chrono>
+
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
@@ -18,26 +20,35 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
+#include "video_core/shader_notify.h"
#include "video_core/video_core.h"
namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
- : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
- auto& rasterizer{renderer->Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
- dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
- maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
- fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
- kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
- maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
- kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
-}
+GPU::GPU(Core::System& system_, bool is_async_)
+ : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+ dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+ maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
+ fermi_2d{std::make_unique<Engines::Fermi2D>()},
+ kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
+ maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
+ kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+ shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
GPU::~GPU() = default;
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+ renderer = std::move(renderer_);
+
+ VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
+ memory_manager->BindRasterizer(rasterizer);
+ maxwell_3d->BindRasterizer(rasterizer);
+ fermi_2d->BindRasterizer(rasterizer);
+ kepler_compute->BindRasterizer(rasterizer);
+}
+
Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
@@ -77,7 +88,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
}
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
+ sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
@@ -154,9 +165,8 @@ u64 GPU::GetTicks() const {
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
- const u64 cpu_ticks = system.CoreTiming().GetTicks();
- u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
- if (Settings::values.use_fast_gpu_time) {
+ u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
+ if (Settings::values.use_fast_gpu_time.GetValue()) {
nanoseconds /= 256;
}
const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a1b4c305c..2d15d1c6f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -33,59 +33,68 @@ class System;
namespace VideoCore {
class RendererBase;
+class ShaderNotify;
} // namespace VideoCore
namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
- RGBA32_FLOAT = 0xC0,
- RGBA32_UINT = 0xC2,
- RGBA16_UNORM = 0xC6,
- RGBA16_SNORM = 0xC7,
- RGBA16_UINT = 0xC9,
- RGBA16_FLOAT = 0xCA,
- RG32_FLOAT = 0xCB,
- RG32_UINT = 0xCD,
- RGBX16_FLOAT = 0xCE,
- BGRA8_UNORM = 0xCF,
- BGRA8_SRGB = 0xD0,
- RGB10_A2_UNORM = 0xD1,
- RGBA8_UNORM = 0xD5,
- RGBA8_SRGB = 0xD6,
- RGBA8_SNORM = 0xD7,
- RGBA8_UINT = 0xD9,
- RG16_UNORM = 0xDA,
- RG16_SNORM = 0xDB,
- RG16_SINT = 0xDC,
- RG16_UINT = 0xDD,
- RG16_FLOAT = 0xDE,
- R11G11B10_FLOAT = 0xE0,
+ R32B32G32A32_FLOAT = 0xC0,
+ R32G32B32A32_SINT = 0xC1,
+ R32G32B32A32_UINT = 0xC2,
+ R16G16B16A16_UNORM = 0xC6,
+ R16G16B16A16_SNORM = 0xC7,
+ R16G16B16A16_SINT = 0xC8,
+ R16G16B16A16_UINT = 0xC9,
+ R16G16B16A16_FLOAT = 0xCA,
+ R32G32_FLOAT = 0xCB,
+ R32G32_SINT = 0xCC,
+ R32G32_UINT = 0xCD,
+ R16G16B16X16_FLOAT = 0xCE,
+ B8G8R8A8_UNORM = 0xCF,
+ B8G8R8A8_SRGB = 0xD0,
+ A2B10G10R10_UNORM = 0xD1,
+ A2B10G10R10_UINT = 0xD2,
+ A8B8G8R8_UNORM = 0xD5,
+ A8B8G8R8_SRGB = 0xD6,
+ A8B8G8R8_SNORM = 0xD7,
+ A8B8G8R8_SINT = 0xD8,
+ A8B8G8R8_UINT = 0xD9,
+ R16G16_UNORM = 0xDA,
+ R16G16_SNORM = 0xDB,
+ R16G16_SINT = 0xDC,
+ R16G16_UINT = 0xDD,
+ R16G16_FLOAT = 0xDE,
+ B10G11R11_FLOAT = 0xE0,
R32_SINT = 0xE3,
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
- B5G6R5_UNORM = 0xE8,
- BGR5A1_UNORM = 0xE9,
- RG8_UNORM = 0xEA,
- RG8_SNORM = 0xEB,
- RG8_UINT = 0xED,
+ R5G6B5_UNORM = 0xE8,
+ A1R5G5B5_UNORM = 0xE9,
+ R8G8_UNORM = 0xEA,
+ R8G8_SNORM = 0xEB,
+ R8G8_SINT = 0xEC,
+ R8G8_UINT = 0xED,
R16_UNORM = 0xEE,
R16_SNORM = 0xEF,
R16_SINT = 0xF0,
R16_UINT = 0xF1,
R16_FLOAT = 0xF2,
R8_UNORM = 0xF3,
+ R8_SNORM = 0xF4,
+ R8_SINT = 0xF5,
R8_UINT = 0xF6,
};
enum class DepthFormat : u32 {
- Z32_FLOAT = 0xA,
- Z16_UNORM = 0x13,
- S8_Z24_UNORM = 0x14,
- Z24_X8_UNORM = 0x15,
- Z24_S8_UNORM = 0x16,
- Z24_C8_UNORM = 0x18,
- Z32_S8_X24_FLOAT = 0x19,
+ D32_FLOAT = 0xA,
+ D16_UNORM = 0x13,
+ S8_UINT_Z24_UNORM = 0x14,
+ D24X8_UNORM = 0x15,
+ D24S8_UNORM = 0x16,
+ D24C8_UNORM = 0x18,
+ D32_FLOAT_S8X24_UINT = 0x19,
};
struct CommandListHeader;
@@ -96,9 +105,9 @@ class DebugContext;
*/
struct FramebufferConfig {
enum class PixelFormat : u32 {
- ABGR8 = 1,
- RGB565 = 4,
- BGRA8 = 5,
+ A8B8G8R8_UNORM = 1,
+ RGB565_UNORM = 4,
+ B8G8R8A8_UNORM = 5,
};
VAddr address;
@@ -133,11 +142,6 @@ class MemoryManager;
class GPU {
public:
- explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- bool is_async);
-
- virtual ~GPU();
-
struct MethodCall {
u32 method{};
u32 argument{};
@@ -153,6 +157,12 @@ public:
method_count(method_count) {}
};
+ explicit GPU(Core::System& system, bool is_async);
+ virtual ~GPU();
+
+ /// Binds a renderer to the GPU.
+ void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
+
/// Calls a GPU method.
void CallMethod(const MethodCall& method_call);
@@ -207,6 +217,14 @@ public:
return *renderer;
}
+ VideoCore::ShaderNotify& ShaderNotify() {
+ return *shader_notify;
+ }
+
+ const VideoCore::ShaderNotify& ShaderNotify() const {
+ return *shader_notify;
+ }
+
// Waits for the GPU to finish working
virtual void WaitIdle() const = 0;
@@ -235,7 +253,7 @@ public:
const Tegra::DmaPusher& DmaPusher() const;
struct Regs {
- static constexpr size_t NUM_REGS = 0x100;
+ static constexpr size_t NUM_REGS = 0x40;
union {
struct {
@@ -254,7 +272,7 @@ public:
u32 semaphore_trigger;
INSERT_UNION_PADDING_WORDS(0xC);
- // The puser and the puller share the reference counter, the pusher only has read
+ // The pusher and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
INSERT_UNION_PADDING_WORDS(0x5);
@@ -284,6 +302,12 @@ public:
/// core timing events.
virtual void Start() = 0;
+ /// Obtain the CPU Context
+ virtual void ObtainContext() = 0;
+
+ /// Release the CPU Context
+ virtual void ReleaseContext() = 0;
+
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
@@ -322,13 +346,12 @@ private:
bool ExecuteMethodOnEngine(u32 method);
protected:
- std::unique_ptr<Tegra::DmaPusher> dma_pusher;
Core::System& system;
+ std::unique_ptr<Tegra::MemoryManager> memory_manager;
+ std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
private:
- std::unique_ptr<Tegra::MemoryManager> memory_manager;
-
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
/// 3D engine
@@ -341,6 +364,8 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+ /// Shader build notifier
+ std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 53305ab43..70a3d5738 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,17 +10,22 @@
namespace VideoCommon {
-GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
- : GPU(system, std::move(renderer_), true), gpu_thread{system},
- cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
- gpu_context(std::move(context)) {}
+GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
+ gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+ cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
- gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
+}
+
+void GPUAsynch::ObtainContext() {
+ cpu_context->MakeCurrent();
+}
+
+void GPUAsynch::ReleaseContext() {
+ cpu_context->DoneCurrent();
}
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 517658612..f89c855a5 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -20,11 +20,12 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch final : public Tegra::GPU {
public:
- explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+ explicit GPUAsynch(Core::System& system);
~GPUAsynch() override;
void Start() override;
+ void ObtainContext() override;
+ void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
@@ -40,7 +41,6 @@ protected:
private:
GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
- std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 6f38a672a..1ca47ddef 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,14 +7,18 @@
namespace VideoCommon {
-GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
- : GPU(system, std::move(renderer), false), context{std::move(context)} {}
+GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
GPUSynch::~GPUSynch() = default;
-void GPUSynch::Start() {
- context->MakeCurrent();
+void GPUSynch::Start() {}
+
+void GPUSynch::ObtainContext() {
+ renderer->Context().MakeCurrent();
+}
+
+void GPUSynch::ReleaseContext() {
+ renderer->Context().DoneCurrent();
}
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 4a6e9a01d..297258cb1 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -19,11 +19,12 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch final : public Tegra::GPU {
public:
- explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+ explicit GPUSynch(Core::System& system);
~GPUSynch() override;
void Start() override;
+ void ObtainContext() override;
+ void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
@@ -34,9 +35,6 @@ public:
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
[[maybe_unused]] u32 value) const override {}
-
-private:
- std::unique_ptr<Core::Frontend::GraphicsContext> context;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c3bb4fe06..bf761abf2 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
@@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread {
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
- MicroProfileOnThreadCreate("GpuThread");
+ std::string name = "yuzu:GPU";
+ MicroProfileOnThreadCreate(name.c_str());
+ Common::SetCurrentThreadName(name.c_str());
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+ system.RegisterHostThread();
// Wait for first GPU command before acquiring the window context
while (state.queue.Empty())
@@ -39,9 +44,9 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
- } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
+ } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
renderer.Rasterizer().ReleaseFences();
- } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
+ } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
new file mode 100644
index 000000000..aa62363a7
--- /dev/null
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SHADER_FILES
+ opengl_present.frag
+ opengl_present.vert
+)
+
+set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
+set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
+
+set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
+add_custom_command(
+ OUTPUT
+ ${SHADER_DIR}
+ COMMAND
+ ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR}
+)
+
+set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
+set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
+
+foreach(FILENAME IN ITEMS ${SHADER_FILES})
+ string(REPLACE "." "_" SHADER_NAME ${FILENAME})
+ set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
+ set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
+ add_custom_command(
+ OUTPUT
+ ${HEADER_FILE}
+ COMMAND
+ ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
+ MAIN_DEPENDENCY
+ ${SOURCE_FILE}
+ DEPENDS
+ ${HEADER_GENERATOR}
+ ${INPUT_FILE}
+ )
+ set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
+endforeach()
+
+add_custom_target(host_shaders
+ DEPENDS
+ ${SHADER_HEADERS}
+ SOURCES
+ ${SHADER_FILES}
+)
diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake
new file mode 100644
index 000000000..368bce0ed
--- /dev/null
+++ b/src/video_core/host_shaders/StringShaderHeader.cmake
@@ -0,0 +1,11 @@
+set(SOURCE_FILE ${CMAKE_ARGV3})
+set(HEADER_FILE ${CMAKE_ARGV4})
+set(INPUT_FILE ${CMAKE_ARGV5})
+
+get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME)
+string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME})
+string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
+
+file(READ ${SOURCE_FILE} CONTENTS)
+
+configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
new file mode 100644
index 000000000..8a4cb024b
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -0,0 +1,10 @@
+#version 430 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;
+
+layout (binding = 0) uniform sampler2D color_texture;
+
+void main() {
+ color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
+}
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
new file mode 100644
index 000000000..2235d31a4
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -0,0 +1,24 @@
+#version 430 core
+
+out gl_PerVertex {
+ vec4 gl_Position;
+};
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;
+
+// This is a truncated 3x3 matrix for 2D transformations:
+// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
+// The third column performs translation.
+// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
+// implicitly be [0, 0, 1]
+layout (location = 0) uniform mat3x2 modelview_matrix;
+
+void main() {
+ // Multiply input position by the rotscale part of the matrix and then manually translate by
+ // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+ // to `vec3(vert_position.xy, 1.0)`
+ gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
+ frag_tex_coord = vert_tex_coord;
+}
diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in
new file mode 100644
index 000000000..ccdb0d2a9
--- /dev/null
+++ b/src/video_core/host_shaders/source_shader.h.in
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <string_view>
+
+namespace HostShaders {
+
+constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)";
+
+} // namespace HostShaders
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 89077a2d8..cd21a2112 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,32 +2,78 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <optional>
+#include <boost/container_hash/hash.hpp>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/settings.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/macro/macro.h"
+#include "video_core/macro/macro_hle.h"
#include "video_core/macro/macro_interpreter.h"
#include "video_core/macro/macro_jit_x64.h"
namespace Tegra {
+MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
+ : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
+
+MacroEngine::~MacroEngine() = default;
+
void MacroEngine::AddCode(u32 method, u32 data) {
uploaded_macro_code[method].push_back(data);
}
-void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
+void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
+ const std::vector<u32>& parameters) {
auto compiled_macro = macro_cache.find(method);
if (compiled_macro != macro_cache.end()) {
- compiled_macro->second->Execute(parameters, method);
+ const auto& cache_info = compiled_macro->second;
+ if (cache_info.has_hle_program) {
+ cache_info.hle_program->Execute(parameters, method);
+ } else {
+ cache_info.lle_program->Execute(parameters, method);
+ }
} else {
// Macro not compiled, check if it's uploaded and if so, compile it
- auto macro_code = uploaded_macro_code.find(method);
+ std::optional<u32> mid_method;
+ const auto macro_code = uploaded_macro_code.find(method);
if (macro_code == uploaded_macro_code.end()) {
- UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
- return;
+ for (const auto& [method_base, code] : uploaded_macro_code) {
+ if (method >= method_base && (method - method_base) < code.size()) {
+ mid_method = method_base;
+ break;
+ }
+ }
+ if (!mid_method.has_value()) {
+ UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
+ return;
+ }
+ }
+ auto& cache_info = macro_cache[method];
+
+ if (!mid_method.has_value()) {
+ cache_info.lle_program = Compile(macro_code->second);
+ cache_info.hash = boost::hash_value(macro_code->second);
+ } else {
+ const auto& macro_cached = uploaded_macro_code[mid_method.value()];
+ const auto rebased_method = method - mid_method.value();
+ auto& code = uploaded_macro_code[method];
+ code.resize(macro_cached.size() - rebased_method);
+ std::memcpy(code.data(), macro_cached.data() + rebased_method,
+ code.size() * sizeof(u32));
+ cache_info.hash = boost::hash_value(code);
+ cache_info.lle_program = Compile(code);
+ }
+
+ auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
+ if (hle_program.has_value()) {
+ cache_info.has_hle_program = true;
+ cache_info.hle_program = std::move(hle_program.value());
+ cache_info.hle_program->Execute(parameters, method);
+ } else {
+ cache_info.lle_program->Execute(parameters, method);
}
- macro_cache[method] = Compile(macro_code->second);
- macro_cache[method]->Execute(parameters, method);
}
}
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index b76ed891f..31ee3440a 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -11,9 +11,11 @@
#include "common/common_types.h"
namespace Tegra {
+
namespace Engines {
class Maxwell3D;
}
+
namespace Macro {
constexpr std::size_t NUM_MACRO_REGISTERS = 8;
enum class Operation : u32 {
@@ -94,33 +96,45 @@ union MethodAddress {
} // namespace Macro
+class HLEMacro;
+
class CachedMacro {
public:
virtual ~CachedMacro() = default;
/**
* Executes the macro code with the specified input parameters.
- * @param code The macro byte code to execute
+ *
* @param parameters The parameters of the macro
+ * @param method The method to execute
*/
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
};
class MacroEngine {
public:
- virtual ~MacroEngine() = default;
+ explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
+ virtual ~MacroEngine();
// Store the uploaded macro code to compile them when they're called.
void AddCode(u32 method, u32 data);
// Compiles the macro if its not in the cache, and executes the compiled macro
- void Execute(u32 method, const std::vector<u32>& parameters);
+ void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
protected:
virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
private:
- std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache;
+ struct CacheInfo {
+ std::unique_ptr<CachedMacro> lle_program{};
+ std::unique_ptr<CachedMacro> hle_program{};
+ u64 hash{};
+ bool has_hle_program{};
+ };
+
+ std::unordered_map<u32, CacheInfo> macro_cache;
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
+ std::unique_ptr<HLEMacro> hle_macros;
};
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
new file mode 100644
index 000000000..df00b57df
--- /dev/null
+++ b/src/video_core/macro/macro_hle.cpp
@@ -0,0 +1,109 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/macro/macro_hle.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra {
+
+namespace {
+// HLE'd functions
+void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
+ const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
+
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff));
+ maxwell3d.regs.vb_base_instance = parameters[5];
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.regs.vb_element_base = parameters[3];
+ maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.regs.index_array.first = parameters[4];
+
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.Rasterizer().Draw(true, true);
+ }
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+
+void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
+ const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+
+ maxwell3d.regs.vertex_buffer.first = parameters[3];
+ maxwell3d.regs.vertex_buffer.count = parameters[1];
+ maxwell3d.regs.vb_base_instance = parameters[4];
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
+ maxwell3d.mme_draw.instance_count = count;
+
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.Rasterizer().Draw(false, true);
+ }
+ maxwell3d.regs.vertex_buffer.count = 0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+
+void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
+ const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ const u32 element_base = parameters[4];
+ const u32 base_instance = parameters[5];
+ maxwell3d.regs.index_array.first = parameters[3];
+ maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
+ maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.regs.vb_element_base = element_base;
+ maxwell3d.regs.vb_base_instance = base_instance;
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, element_base);
+ maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.Rasterizer().Draw(true, true);
+ }
+ maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.regs.vb_element_base = 0x0;
+ maxwell3d.regs.vb_base_instance = 0x0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, 0x0);
+ maxwell3d.CallMethodFromMME(0x8e5, 0x0);
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+} // Anonymous namespace
+
+constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
+ {0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
+ {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
+ {0x0217920100488FF7, &HLE_0217920100488FF7},
+}};
+
+HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+HLEMacro::~HLEMacro() = default;
+
+std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
+ const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
+ [hash](const auto& pair) { return pair.first == hash; });
+ if (it == hle_funcs.end()) {
+ return std::nullopt;
+ }
+ return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
+}
+
+HLEMacroImpl::~HLEMacroImpl() = default;
+
+HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
+ : maxwell3d(maxwell3d), func(func) {}
+
+void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
+ func(maxwell3d, parameters);
+}
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
new file mode 100644
index 000000000..37af875a0
--- /dev/null
+++ b/src/video_core/macro/macro_hle.h
@@ -0,0 +1,44 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/macro/macro.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+}
+
+using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
+
+class HLEMacro {
+public:
+ explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
+ ~HLEMacro();
+
+ std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+};
+
+class HLEMacroImpl : public CachedMacro {
+public:
+ explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
+ ~HLEMacroImpl();
+
+ void Execute(const std::vector<u32>& parameters, u32 method) override;
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+ HLEFunction func;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 5edff27aa..bd01fd1f2 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -11,7 +11,8 @@
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra {
-MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
+ : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
@@ -33,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho
this->parameters = std::make_unique<u32[]>(num_parameters);
}
std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
- this->num_parameters = num_parameters;
// Execute the code until we hit an exit condition.
bool keep_executing = true;
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 11c1cc3be..954b87515 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,27 +14,22 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
namespace Tegra {
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9;
-static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10;
-static const Xbyak::Reg64 STATE = Xbyak::util::r11;
-static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
-static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
-static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13;
-static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
-static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
+constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
- PARAMETERS,
- REGISTERS,
STATE,
- NEXT_PARAMETER,
RESULT,
+ PARAMETERS,
METHOD_ADDRESS,
BRANCH_HOLDER,
});
-MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
+ : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
@@ -53,32 +48,32 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
JITState state{};
state.maxwell3d = &maxwell3d;
state.registers = {};
- state.parameters = parameters.data();
- program(&state);
+ program(&state, parameters.data());
}
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
const bool is_a_zero = opcode.src_a == 0;
const bool is_b_zero = opcode.src_b == 0;
const bool valid_operation = !is_a_zero && !is_b_zero;
- const bool is_move_operation = !is_a_zero && is_b_zero;
+ [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
const bool has_zero_register = is_a_zero || is_b_zero;
+ const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
+ opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
- Xbyak::Reg64 src_a;
+ Xbyak::Reg32 src_a;
Xbyak::Reg32 src_b;
- if (!optimizer.zero_reg_skip) {
- src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
- src_b = Compile_GetRegister(opcode.src_b, ebx);
+ if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
+ src_a = Compile_GetRegister(opcode.src_a, RESULT);
+ src_b = Compile_GetRegister(opcode.src_b, eax);
} else {
if (!is_a_zero) {
- src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
+ src_a = Compile_GetRegister(opcode.src_a, RESULT);
}
if (!is_b_zero) {
- src_b = Compile_GetRegister(opcode.src_b, ebx);
+ src_b = Compile_GetRegister(opcode.src_b, eax);
}
}
- Xbyak::Label skip_carry{};
bool has_emitted = false;
@@ -190,7 +185,8 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
if (next_opcode.has_value()) {
const auto next = *next_opcode;
- if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
+ if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
+ opcode.dst == next.dst) {
return;
}
}
@@ -244,10 +240,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
}
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
- auto dst = Compile_GetRegister(opcode.src_a, eax);
- auto src = Compile_GetRegister(opcode.src_b, RESULT);
+ const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+ const auto src = Compile_GetRegister(opcode.src_b, RESULT);
- shr(src, al);
+ shr(src, dst.cvt8());
if (opcode.bf_size != 0 && opcode.bf_size != 31) {
and_(src, opcode.GetBitfieldMask());
} else if (opcode.bf_size == 0) {
@@ -263,8 +259,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
}
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
- auto dst = Compile_GetRegister(opcode.src_a, eax);
- auto src = Compile_GetRegister(opcode.src_b, RESULT);
+ const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+ const auto src = Compile_GetRegister(opcode.src_b, RESULT);
if (opcode.bf_src_bit != 0) {
shr(src, opcode.bf_src_bit);
@@ -273,16 +269,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
if (opcode.bf_size != 31) {
and_(src, opcode.GetBitfieldMask());
}
- shl(src, al);
- Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
+ shl(src, dst.cvt8());
-static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) {
- return maxwell3d->GetRegisterValue(method);
-}
-
-static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
- maxwell3d->CallMethodFromMME(method_address.address, value);
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
@@ -302,22 +291,34 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
sub(result, opcode.immediate * -1);
}
}
- Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
- mov(Common::X64::ABI_PARAM1, qword[STATE]);
- mov(Common::X64::ABI_PARAM2, RESULT);
- Common::X64::CallFarFunction(*this, &Read);
- Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
- mov(RESULT, Common::X64::ABI_RETURN.cvt32());
+
+ // Equivalent to Engines::Maxwell3D::GetRegisterValue:
+ if (optimizer.enable_asserts) {
+ Xbyak::Label pass_range_check;
+ cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
+ jb(pass_range_check);
+ int3();
+ L(pass_range_check);
+ }
+ mov(rax, qword[STATE]);
+ mov(RESULT,
+ dword[rax + offsetof(Engines::Maxwell3D, regs) +
+ offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
+
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
+static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
+ maxwell3d->CallMethodFromMME(method_address.address, value);
+}
+
void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
- Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, qword[STATE]);
mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
mov(Common::X64::ABI_PARAM3, value);
Common::X64::CallFarFunction(*this, &Send);
- Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
Xbyak::Label dont_process{};
// Get increment
@@ -329,7 +330,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
and_(METHOD_ADDRESS, 0xfff);
shr(ecx, 12);
and_(ecx, 0x3f);
- lea(eax, ptr[rcx + METHOD_ADDRESS_64]);
+ lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
sal(ecx, 12);
or_(eax, ecx);
@@ -418,22 +419,17 @@ void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
void MacroJITx64Impl::Compile() {
MICROPROFILE_SCOPE(MacroJitCompile);
- bool keep_executing = true;
labels.fill(Xbyak::Label());
- Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
// JIT state
mov(STATE, Common::X64::ABI_PARAM1);
- mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 +
- static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]);
- mov(REGISTERS, Common::X64::ABI_PARAM1);
- add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
+ mov(PARAMETERS, Common::X64::ABI_PARAM2);
xor_(RESULT, RESULT);
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
- xor_(NEXT_PARAMETER, NEXT_PARAMETER);
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
- mov(dword[REGISTERS + 4], Compile_FetchParameter());
+ mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
// Track get register for zero registers and mark it as no-op
optimizer.zero_reg_skip = true;
@@ -446,6 +442,9 @@ void MacroJITx64Impl::Compile() {
// one if our register isn't "dirty"
optimizer.optimize_for_method_move = true;
+ // Enable run-time assertions in JITted code
+ optimizer.enable_asserts = false;
+
// Check to see if we can skip emitting certain instructions
Optimizer_ScanFlags();
@@ -463,7 +462,7 @@ void MacroJITx64Impl::Compile() {
L(end_of_code);
- Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
ret();
ready();
program = getCode<ProgramType>();
@@ -537,8 +536,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
}
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
- mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]);
- inc(NEXT_PARAMETER);
+ mov(eax, dword[PARAMETERS]);
+ add(PARAMETERS, sizeof(u32));
return eax;
}
@@ -547,41 +546,22 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
// Register 0 is always zero
xor_(dst, dst);
} else {
- mov(dst, dword[REGISTERS + index * sizeof(u32)]);
- }
-
- return dst;
-}
-
-Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) {
- if (index == 0) {
- // Register 0 is always zero
- xor_(dst, dst);
- } else {
- mov(dst, dword[REGISTERS + index * sizeof(u32)]);
+ mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
}
return dst;
}
-void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
- Xbyak::Label zero{}, end{};
- xor_(ecx, ecx);
- shr(dst, 32);
- setne(cl);
- mov(dword[STATE + offsetof(JITState, carry_flag)], ecx);
-}
-
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
- auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
+ const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
// register.
if (reg == 0) {
return;
}
- mov(dword[REGISTERS + reg * sizeof(u32)], result);
+ mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
};
- auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };
+ const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
switch (operation) {
case Macro::ResultOperation::IgnoreAndFetch:
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 21ee157cf..a180e7428 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -55,8 +55,6 @@ private:
Xbyak::Reg32 Compile_FetchParameter();
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
- Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst);
- void Compile_WriteCarry(Xbyak::Reg64 dst);
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
void Compile_Send(Xbyak::Reg32 value);
@@ -67,11 +65,10 @@ private:
struct JITState {
Engines::Maxwell3D* maxwell3d{};
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
- const u32* parameters{};
u32 carry_flag{};
};
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
- using ProgramType = void (*)(JITState*);
+ using ProgramType = void (*)(JITState*, const u32*);
struct OptimizerState {
bool can_skip_carry{};
@@ -79,14 +76,15 @@ private:
bool zero_reg_skip{};
bool skip_dummy_addimmediate{};
bool optimize_for_method_move{};
+ bool enable_asserts{};
};
OptimizerState optimizer{};
std::optional<Macro::Opcode> next_opcode{};
ProgramType program{nullptr};
- std::array<Xbyak::Label, MAX_CODE_SIZE> labels{};
- std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip{};
+ std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
+ std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
Xbyak::Label end_of_code{};
bool is_delay_slot{};
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index dbee9f634..02cf53d15 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -4,7 +4,6 @@
#include "common/alignment.h"
#include "common/assert.h"
-#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
@@ -15,122 +14,142 @@
namespace Tegra {
-MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
- : rasterizer{rasterizer}, system{system} {
- page_table.Resize(address_space_width, page_bits, false);
-
- // Initialize the map with a single free region covering the entire managed space.
- VirtualMemoryArea initial_vma;
- initial_vma.size = address_space_end;
- vma_map.emplace(initial_vma.base, initial_vma);
-
- UpdatePageTableForVMA(initial_vma);
-}
+MemoryManager::MemoryManager(Core::System& system_)
+ : system{system_}, page_table(page_table_size) {}
MemoryManager::~MemoryManager() = default;
-GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
- const u64 aligned_size{Common::AlignUp(size, page_size)};
- const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
-
- AllocateMemory(gpu_addr, 0, aligned_size);
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
+ u64 remaining_size{size};
+ for (u64 offset{}; offset < size; offset += page_size) {
+ if (remaining_size < page_size) {
+ SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size);
+ } else {
+ SetPageEntry(gpu_addr + offset, page_entry + offset);
+ }
+ remaining_size -= page_size;
+ }
return gpu_addr;
}
-GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
- const u64 aligned_size{Common::AlignUp(size, page_size)};
-
- AllocateMemory(gpu_addr, 0, aligned_size);
+GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
+ return UpdateRange(gpu_addr, cpu_addr, size);
+}
- return gpu_addr;
+GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
+ return Map(cpu_addr, *FindFreeRange(size, align), size);
}
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
- const u64 aligned_size{Common::AlignUp(size, page_size)};
- const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
+void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
+ if (!size) {
+ return;
+ }
- MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
- ASSERT(
- system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
+ // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+ system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size);
- return gpu_addr;
+ UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & page_mask) == 0);
+std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
+ for (u64 offset{}; offset < size; offset += page_size) {
+ if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) {
+ return std::nullopt;
+ }
+ }
- const u64 aligned_size{Common::AlignUp(size, page_size)};
+ return UpdateRange(gpu_addr, PageEntry::State::Allocated, size);
+}
- MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
- ASSERT(
- system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
- return gpu_addr;
+GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) {
+ return *AllocateFixed(*FindFreeRange(size, align), size);
}
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & page_mask) == 0);
+void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) {
+ if (!page_entry.IsValid()) {
+ return;
+ }
- const u64 aligned_size{Common::AlignUp(size, page_size)};
- const auto cpu_addr = GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
+ ASSERT(system.CurrentProcess()
+ ->PageTable()
+ .LockForDeviceAddressSpace(page_entry.ToAddress(), size)
+ .IsSuccess());
+}
- // Flush and invalidate through the GPU interface, to be asynchronous if possible.
- system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
+void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) {
+ if (!page_entry.IsValid()) {
+ return;
+ }
- UnmapRange(gpu_addr, aligned_size);
ASSERT(system.CurrentProcess()
->PageTable()
- .UnlockForDeviceAddressSpace(cpu_addr.value(), size)
+ .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size)
.IsSuccess());
+}
- return gpu_addr;
+PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const {
+ return page_table[PageEntryIndex(gpu_addr)];
}
-GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
- // Find the first Free VMA.
- const VMAHandle vma_handle{
- std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
- if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
- return false;
- }
+void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
+ // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to
+ // improper tracking, but should be fixed in the future.
- const VAddr vma_end{vma.second.base + vma.second.size};
- return vma_end > region_start && vma_end >= region_start + size;
- })};
+ //// Unlock the old page
+ // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size);
- if (vma_handle == vma_map.end()) {
- return {};
- }
+ //// Lock the new page
+ // TryLockPage(page_entry, size);
- return std::max(region_start, vma_handle->second.base);
+ page_table[PageEntryIndex(gpu_addr)] = page_entry;
}
-bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
- return (addr >> page_bits) < page_table.pointers.size();
-}
+std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const {
+ if (!align) {
+ align = page_size;
+ } else {
+ align = Common::AlignUp(align, page_size);
+ }
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
- if (!IsAddressValid(addr)) {
- return {};
+ u64 available_size{};
+ GPUVAddr gpu_addr{address_space_start};
+ while (gpu_addr + available_size < address_space_size) {
+ if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
+ available_size += page_size;
+
+ if (available_size >= size) {
+ return gpu_addr;
+ }
+ } else {
+ gpu_addr += available_size + page_size;
+ available_size = 0;
+
+ const auto remainder{gpu_addr % align};
+ if (remainder) {
+ gpu_addr = (gpu_addr - remainder) + align;
+ }
+ }
}
- const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
- if (cpu_addr) {
- return cpu_addr + (addr & page_mask);
+ return std::nullopt;
+}
+
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
+ const auto page_entry{GetPageEntry(gpu_addr)};
+ if (!page_entry.IsValid()) {
+ return std::nullopt;
}
- return {};
+ return page_entry.ToAddress() + (gpu_addr & page_mask);
}
template <typename T>
T MemoryManager::Read(GPUVAddr addr) const {
- if (!IsAddressValid(addr)) {
- return {};
- }
-
- const u8* page_pointer{GetPointer(addr)};
- if (page_pointer) {
+ if (auto page_pointer{GetPointer(addr)}; page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
T value;
std::memcpy(&value, page_pointer, sizeof(T));
@@ -144,12 +163,7 @@ T MemoryManager::Read(GPUVAddr addr) const {
template <typename T>
void MemoryManager::Write(GPUVAddr addr, T data) {
- if (!IsAddressValid(addr)) {
- return;
- }
-
- u8* page_pointer{GetPointer(addr)};
- if (page_pointer) {
+ if (auto page_pointer{GetPointer(addr)}; page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
std::memcpy(page_pointer, &data, sizeof(T));
return;
@@ -167,65 +181,49 @@ template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
-u8* MemoryManager::GetPointer(GPUVAddr addr) {
- if (!IsAddressValid(addr)) {
+u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
+ if (!GetPageEntry(gpu_addr).IsValid()) {
return {};
}
- auto& memory = system.Memory();
-
- const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
-
- if (page_addr != 0) {
- return memory.GetPointer(page_addr + (addr & page_mask));
+ const auto address{GpuToCpuAddress(gpu_addr)};
+ if (!address) {
+ return {};
}
- LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
- return {};
+ return system.Memory().GetPointer(*address);
}
-const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
- if (!IsAddressValid(addr)) {
+const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
+ if (!GetPageEntry(gpu_addr).IsValid()) {
return {};
}
- const auto& memory = system.Memory();
-
- const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
-
- if (page_addr != 0) {
- return memory.GetPointer(page_addr + (addr & page_mask));
+ const auto address{GpuToCpuAddress(gpu_addr)};
+ if (!address) {
+ return {};
}
- LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
- return {};
-}
-
-bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
- const std::size_t inner_size = size - 1;
- const GPUVAddr end = start + inner_size;
- const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
- const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
- const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
- return range == inner_size;
+ return system.Memory().GetPointer(*address);
}
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
+void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
std::size_t remaining_size{size};
- std::size_t page_index{src_addr >> page_bits};
- std::size_t page_offset{src_addr & page_mask};
-
- auto& memory = system.Memory();
+ std::size_t page_index{gpu_src_addr >> page_bits};
+ std::size_t page_offset{gpu_src_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
- // Flush must happen on the rasterizer interface, such that memory is always synchronous
- // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
- rasterizer.FlushRegion(src_addr, copy_amount);
- memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto src_addr{*page_addr + page_offset};
+
+ // Flush must happen on the rasterizer interface, such that memory is always synchronous
+ // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
+ rasterizer->FlushRegion(src_addr, copy_amount);
+ system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+ }
page_index++;
page_offset = 0;
@@ -234,24 +232,23 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
}
}
-void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
+void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
std::size_t remaining_size{size};
- std::size_t page_index{src_addr >> page_bits};
- std::size_t page_offset{src_addr & page_mask};
-
- auto& memory = system.Memory();
+ std::size_t page_index{gpu_src_addr >> page_bits};
+ std::size_t page_offset{gpu_src_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const u8* page_pointer = page_table.pointers[page_index];
- if (page_pointer) {
- const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
- memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto src_addr{*page_addr + page_offset};
+ system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
} else {
std::memset(dest_buffer, 0, copy_amount);
}
+
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
@@ -259,22 +256,23 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
}
}
-void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
+void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
std::size_t remaining_size{size};
- std::size_t page_index{dest_addr >> page_bits};
- std::size_t page_offset{dest_addr & page_mask};
-
- auto& memory = system.Memory();
+ std::size_t page_index{gpu_dest_addr >> page_bits};
+ std::size_t page_offset{gpu_dest_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
- // Invalidate must happen on the rasterizer interface, such that memory is always
- // synchronous when it is written (even when in asynchronous GPU mode).
- rasterizer.InvalidateRegion(dest_addr, copy_amount);
- memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto dest_addr{*page_addr + page_offset};
+
+ // Invalidate must happen on the rasterizer interface, such that memory is always
+ // synchronous when it is written (even when in asynchronous GPU mode).
+ rasterizer->InvalidateRegion(dest_addr, copy_amount);
+ system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+ }
page_index++;
page_offset = 0;
@@ -283,22 +281,21 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
}
}
-void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
- const std::size_t size) {
+void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
+ std::size_t size) {
std::size_t remaining_size{size};
- std::size_t page_index{dest_addr >> page_bits};
- std::size_t page_offset{dest_addr & page_mask};
-
- auto& memory = system.Memory();
+ std::size_t page_index{gpu_dest_addr >> page_bits};
+ std::size_t page_offset{gpu_dest_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- u8* page_pointer = page_table.pointers[page_index];
- if (page_pointer) {
- const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
- memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto dest_addr{*page_addr + page_offset};
+ system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
}
+
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
@@ -306,271 +303,26 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
}
}
-void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
std::vector<u8> tmp_buffer(size);
- ReadBlock(src_addr, tmp_buffer.data(), size);
- WriteBlock(dest_addr, tmp_buffer.data(), size);
+ ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
+ WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
}
-void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
+ std::size_t size) {
std::vector<u8> tmp_buffer(size);
- ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
- WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
-}
-
-bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
- const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
- const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size;
- return page <= Core::Memory::PAGE_SIZE;
+ ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
+ WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
}
-void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
- VAddr backing_addr) {
- LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
- (base + size) * page_size);
-
- const VAddr end{base + size};
- ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
- base + page_table.pointers.size());
-
- if (memory == nullptr) {
- while (base != end) {
- page_table.pointers[base] = nullptr;
- page_table.backing_addr[base] = 0;
-
- base += 1;
- }
- } else {
- while (base != end) {
- page_table.pointers[base] = memory;
- page_table.backing_addr[base] = backing_addr;
-
- base += 1;
- memory += page_size;
- backing_addr += page_size;
- }
- }
-}
-
-void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
- ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
- MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
-}
-
-void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
- ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
- MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
-}
-
-bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
- ASSERT(base + size == next.base);
- if (type != next.type) {
- return {};
- }
- if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
- return {};
- }
- if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
- return {};
- }
- return true;
-}
-
-MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
- if (target >= address_space_end) {
- return vma_map.end();
- } else {
- return std::prev(vma_map.upper_bound(target));
- }
-}
-
-MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
- VirtualMemoryArea& vma{vma_handle->second};
-
- vma.type = VirtualMemoryArea::Type::Allocated;
- vma.backing_addr = 0;
- vma.backing_memory = {};
- UpdatePageTableForVMA(vma);
-
- return MergeAdjacent(vma_handle);
-}
-
-MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
- u64 size) {
-
- // This is the appropriately sized VMA that will turn into our allocation.
- VMAIter vma_handle{CarveVMA(target, size)};
- VirtualMemoryArea& vma{vma_handle->second};
-
- ASSERT(vma.size == size);
-
- vma.offset = offset;
-
- return Allocate(vma_handle);
-}
-
-MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
- VAddr backing_addr) {
- // This is the appropriately sized VMA that will turn into our allocation.
- VMAIter vma_handle{CarveVMA(target, size)};
- VirtualMemoryArea& vma{vma_handle->second};
-
- ASSERT(vma.size == size);
-
- vma.type = VirtualMemoryArea::Type::Mapped;
- vma.backing_memory = memory;
- vma.backing_addr = backing_addr;
- UpdatePageTableForVMA(vma);
-
- return MergeAdjacent(vma_handle);
-}
-
-void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
- VMAIter vma{CarveVMARange(target, size)};
- const VAddr target_end{target + size};
- const VMAIter end{vma_map.end()};
-
- // The comparison against the end of the range must be done using addresses since VMAs can be
- // merged during this process, causing invalidation of the iterators.
- while (vma != end && vma->second.base < target_end) {
- // Unmapped ranges return to allocated state and can be reused
- // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
- vma = std::next(Allocate(vma));
- }
-
- ASSERT(FindVMA(target)->second.size >= size);
-}
-
-MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
- // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
- // non-const access to its container.
- return vma_map.erase(iter, iter); // Erases an empty range of elements
-}
-
-MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
- ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
-
- VMAIter vma_handle{StripIterConstness(FindVMA(base))};
- if (vma_handle == vma_map.end()) {
- // Target address is outside the managed range
- return {};
- }
-
- const VirtualMemoryArea& vma{vma_handle->second};
- if (vma.type == VirtualMemoryArea::Type::Mapped) {
- // Region is already allocated
- return vma_handle;
- }
-
- const VAddr start_in_vma{base - vma.base};
- const VAddr end_in_vma{start_in_vma + size};
-
- ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
- vma.size, end_in_vma);
-
- if (end_in_vma < vma.size) {
- // Split VMA at the end of the allocated region
- SplitVMA(vma_handle, end_in_vma);
- }
- if (start_in_vma != 0) {
- // Split VMA at the start of the allocated region
- vma_handle = SplitVMA(vma_handle, start_in_vma);
- }
-
- return vma_handle;
-}
-
-MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
- ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
-
- const VAddr target_end{target + size};
- ASSERT(target_end >= target);
- ASSERT(size > 0);
-
- VMAIter begin_vma{StripIterConstness(FindVMA(target))};
- const VMAIter i_end{vma_map.lower_bound(target_end)};
- if (std::any_of(begin_vma, i_end, [](const auto& entry) {
- return entry.second.type == VirtualMemoryArea::Type::Unmapped;
- })) {
- return {};
- }
-
- if (target != begin_vma->second.base) {
- begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
- }
-
- VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
- if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
- end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
- }
-
- return begin_vma;
-}
-
-MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
- VirtualMemoryArea& old_vma{vma_handle->second};
- VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
-
- // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
- // a bug. This restriction might be removed later.
- ASSERT(offset_in_vma < old_vma.size);
- ASSERT(offset_in_vma > 0);
-
- old_vma.size = offset_in_vma;
- new_vma.base += offset_in_vma;
- new_vma.size -= offset_in_vma;
-
- switch (new_vma.type) {
- case VirtualMemoryArea::Type::Unmapped:
- break;
- case VirtualMemoryArea::Type::Allocated:
- new_vma.offset += offset_in_vma;
- break;
- case VirtualMemoryArea::Type::Mapped:
- new_vma.backing_memory += offset_in_vma;
- break;
- }
-
- ASSERT(old_vma.CanBeMergedWith(new_vma));
-
- return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
-}
-
-MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
- const VMAIter next_vma{std::next(iter)};
- if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
- iter->second.size += next_vma->second.size;
- vma_map.erase(next_vma);
- }
-
- if (iter != vma_map.begin()) {
- VMAIter prev_vma{std::prev(iter)};
- if (prev_vma->second.CanBeMergedWith(iter->second)) {
- prev_vma->second.size += iter->second.size;
- vma_map.erase(iter);
- iter = prev_vma;
- }
- }
-
- return iter;
-}
-
-void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
- switch (vma.type) {
- case VirtualMemoryArea::Type::Unmapped:
- UnmapRegion(vma.base, vma.size);
- break;
- case VirtualMemoryArea::Type::Allocated:
- MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
- break;
- case VirtualMemoryArea::Type::Mapped:
- MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
- break;
+bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
+ const auto cpu_addr{GpuToCpuAddress(gpu_addr)};
+ if (!cpu_addr) {
+ return false;
}
+ const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size};
+ return page <= Core::Memory::PAGE_SIZE;
}
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0ddd52d5a..53c8d122a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -6,9 +6,9 @@
#include <map>
#include <optional>
+#include <vector>
#include "common/common_types.h"
-#include "common/page_table.h"
namespace VideoCore {
class RasterizerInterface;
@@ -20,58 +20,70 @@ class System;
namespace Tegra {
-/**
- * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
- * with homogeneous attributes across its extents. In this particular implementation each VMA is
- * also backed by a single host memory allocation.
- */
-struct VirtualMemoryArea {
- enum class Type : u8 {
- Unmapped,
- Allocated,
- Mapped,
+class PageEntry final {
+public:
+ enum class State : u32 {
+ Unmapped = static_cast<u32>(-1),
+ Allocated = static_cast<u32>(-2),
};
- /// Virtual base address of the region.
- GPUVAddr base{};
- /// Size of the region.
- u64 size{};
- /// Memory area mapping type.
- Type type{Type::Unmapped};
- /// CPU memory mapped address corresponding to this memory area.
- VAddr backing_addr{};
- /// Offset into the backing_memory the mapping starts from.
- std::size_t offset{};
- /// Pointer backing this VMA.
- u8* backing_memory{};
-
- /// Tests if this area can be merged to the right with `next`.
- bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+ constexpr PageEntry() = default;
+ constexpr PageEntry(State state) : state{state} {}
+ constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
+
+ [[nodiscard]] constexpr bool IsUnmapped() const {
+ return state == State::Unmapped;
+ }
+
+ [[nodiscard]] constexpr bool IsAllocated() const {
+ return state == State::Allocated;
+ }
+
+ [[nodiscard]] constexpr bool IsValid() const {
+ return !IsUnmapped() && !IsAllocated();
+ }
+
+ [[nodiscard]] constexpr VAddr ToAddress() const {
+ if (!IsValid()) {
+ return {};
+ }
+
+ return static_cast<VAddr>(state) << ShiftBits;
+ }
+
+ [[nodiscard]] constexpr PageEntry operator+(u64 offset) const {
+ // If this is a reserved value, offsets do not apply
+ if (!IsValid()) {
+ return *this;
+ }
+ return PageEntry{(static_cast<VAddr>(state) << ShiftBits) + offset};
+ }
+
+private:
+ static constexpr std::size_t ShiftBits{12};
+
+ State state{State::Unmapped};
};
+static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
class MemoryManager final {
public:
- explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
+ explicit MemoryManager(Core::System& system);
~MemoryManager();
- GPUVAddr AllocateSpace(u64 size, u64 align);
- GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
- GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
- GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
- GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
- std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+ /// Binds a renderer to the memory manager.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
+ [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
template <typename T>
- T Read(GPUVAddr addr) const;
+ [[nodiscard]] T Read(GPUVAddr addr) const;
template <typename T>
void Write(GPUVAddr addr, T data);
- u8* GetPointer(GPUVAddr addr);
- const u8* GetPointer(GPUVAddr addr) const;
-
- /// Returns true if the block is continuous in host memory, false otherwise
- bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;
+ [[nodiscard]] u8* GetPointer(GPUVAddr addr);
+ [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
@@ -79,9 +91,9 @@ public:
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
* Flushes and Invalidations, respectively to each operation.
*/
- void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
- void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+ void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -93,97 +105,48 @@ public:
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
* being flushed.
*/
- void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
- void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+ void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
- * IsGranularRange checks if a gpu region can be simply read with a pointer
+ * IsGranularRange checks if a gpu region can be simply read with a pointer.
*/
- bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
-
-private:
- using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
- using VMAHandle = VMAMap::const_iterator;
- using VMAIter = VMAMap::iterator;
-
- bool IsAddressValid(GPUVAddr addr) const;
- void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
- VAddr backing_addr = 0);
- void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
- void UnmapRegion(GPUVAddr base, u64 size);
+ [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
- /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
- VMAHandle FindVMA(GPUVAddr target) const;
-
- VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
-
- /**
- * Maps an unmanaged host memory pointer at a given address.
- *
- * @param target The guest address to start the mapping at.
- * @param memory The memory to be mapped.
- * @param size Size of the mapping in bytes.
- * @param backing_addr The base address of the range to back this mapping.
- */
- VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+ [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
+ [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
+ [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
+ [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
+ void Unmap(GPUVAddr gpu_addr, std::size_t size);
- /// Unmaps a range of addresses, splitting VMAs as necessary.
- void UnmapRange(GPUVAddr target, u64 size);
-
- /// Converts a VMAHandle to a mutable VMAIter.
- VMAIter StripIterConstness(const VMAHandle& iter);
-
- /// Marks as the specified VMA as allocated.
- VMAIter Allocate(VMAIter vma);
-
- /**
- * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
- * the appropriate error checking.
- */
- VMAIter CarveVMA(GPUVAddr base, u64 size);
-
- /**
- * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
- * end of the range.
- */
- VMAIter CarveVMARange(GPUVAddr base, u64 size);
-
- /**
- * Splits a VMA in two, at the specified offset.
- * @returns the right side of the split, with the original iterator becoming the left side.
- */
- VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
-
- /**
- * Checks for and merges the specified VMA with adjacent ones if possible.
- * @returns the merged VMA or the original if no merging was possible.
- */
- VMAIter MergeAdjacent(VMAIter vma);
+private:
+ [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
+ void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
+ GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
+ [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
- /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
- void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+ void TryLockPage(PageEntry page_entry, std::size_t size);
+ void TryUnlockPage(PageEntry page_entry, std::size_t size);
- /// Finds a free (unmapped region) of the specified size starting at the specified address.
- GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
+ [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
+ return (gpu_addr >> page_bits) & page_table_mask;
+ }
-private:
+ static constexpr u64 address_space_size = 1ULL << 40;
+ static constexpr u64 address_space_start = 1ULL << 32;
static constexpr u64 page_bits{16};
static constexpr u64 page_size{1 << page_bits};
static constexpr u64 page_mask{page_size - 1};
+ static constexpr u64 page_table_bits{24};
+ static constexpr u64 page_table_size{1 << page_table_bits};
+ static constexpr u64 page_table_mask{page_table_size - 1};
- /// Address space in bits, according to Tegra X1 TRM
- static constexpr u32 address_space_width{40};
- /// Start address for mapping, this is fairly arbitrary but must be non-zero.
- static constexpr GPUVAddr address_space_base{0x100000};
- /// End of address space, based on address space in bits.
- static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
+ Core::System& system;
- Common::PageTable page_table;
- VMAMap vma_map;
- VideoCore::RasterizerInterface& rasterizer;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
- Core::System& system;
+ std::vector<PageEntry> page_table;
};
} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 836b25c1d..9da9fb4ff 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -41,146 +41,168 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
}
static constexpr ConversionArray morton_to_linear_fns = {
- MortonCopy<true, PixelFormat::ABGR8U>,
- MortonCopy<true, PixelFormat::ABGR8S>,
- MortonCopy<true, PixelFormat::ABGR8UI>,
- MortonCopy<true, PixelFormat::B5G6R5U>,
- MortonCopy<true, PixelFormat::A2B10G10R10U>,
- MortonCopy<true, PixelFormat::A1B5G5R5U>,
- MortonCopy<true, PixelFormat::R8U>,
- MortonCopy<true, PixelFormat::R8UI>,
- MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::RGBA16U>,
- MortonCopy<true, PixelFormat::RGBA16S>,
- MortonCopy<true, PixelFormat::RGBA16UI>,
- MortonCopy<true, PixelFormat::R11FG11FB10F>,
- MortonCopy<true, PixelFormat::RGBA32UI>,
- MortonCopy<true, PixelFormat::DXT1>,
- MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>,
- MortonCopy<true, PixelFormat::DXN1>,
- MortonCopy<true, PixelFormat::DXN2UNORM>,
- MortonCopy<true, PixelFormat::DXN2SNORM>,
- MortonCopy<true, PixelFormat::BC7U>,
- MortonCopy<true, PixelFormat::BC6H_UF16>,
- MortonCopy<true, PixelFormat::BC6H_SF16>,
- MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
- MortonCopy<true, PixelFormat::BGRA8>,
- MortonCopy<true, PixelFormat::RGBA32F>,
- MortonCopy<true, PixelFormat::RG32F>,
- MortonCopy<true, PixelFormat::R32F>,
- MortonCopy<true, PixelFormat::R16F>,
- MortonCopy<true, PixelFormat::R16U>,
- MortonCopy<true, PixelFormat::R16S>,
- MortonCopy<true, PixelFormat::R16UI>,
- MortonCopy<true, PixelFormat::R16I>,
- MortonCopy<true, PixelFormat::RG16>,
- MortonCopy<true, PixelFormat::RG16F>,
- MortonCopy<true, PixelFormat::RG16UI>,
- MortonCopy<true, PixelFormat::RG16I>,
- MortonCopy<true, PixelFormat::RG16S>,
- MortonCopy<true, PixelFormat::RGB32F>,
- MortonCopy<true, PixelFormat::RGBA8_SRGB>,
- MortonCopy<true, PixelFormat::RG8U>,
- MortonCopy<true, PixelFormat::RG8S>,
- MortonCopy<true, PixelFormat::RG8UI>,
- MortonCopy<true, PixelFormat::RG32UI>,
- MortonCopy<true, PixelFormat::RGBX16F>,
- MortonCopy<true, PixelFormat::R32UI>,
- MortonCopy<true, PixelFormat::R32I>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
- MortonCopy<true, PixelFormat::BGRA8_SRGB>,
- MortonCopy<true, PixelFormat::DXT1_SRGB>,
- MortonCopy<true, PixelFormat::DXT23_SRGB>,
- MortonCopy<true, PixelFormat::DXT45_SRGB>,
- MortonCopy<true, PixelFormat::BC7U_SRGB>,
- MortonCopy<true, PixelFormat::R4G4B4A4U>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
+ MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
+ MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
+ MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
+ MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
+ MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
+ MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
+ MortonCopy<true, PixelFormat::R8_UNORM>,
+ MortonCopy<true, PixelFormat::R8_SNORM>,
+ MortonCopy<true, PixelFormat::R8_SINT>,
+ MortonCopy<true, PixelFormat::R8_UINT>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
+ MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
+ MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
+ MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
+ MortonCopy<true, PixelFormat::BC2_UNORM>,
+ MortonCopy<true, PixelFormat::BC3_UNORM>,
+ MortonCopy<true, PixelFormat::BC4_UNORM>,
+ MortonCopy<true, PixelFormat::BC4_SNORM>,
+ MortonCopy<true, PixelFormat::BC5_UNORM>,
+ MortonCopy<true, PixelFormat::BC5_SNORM>,
+ MortonCopy<true, PixelFormat::BC7_UNORM>,
+ MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
+ MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
+ MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
+ MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
+ MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
+ MortonCopy<true, PixelFormat::R32G32_FLOAT>,
+ MortonCopy<true, PixelFormat::R32G32_SINT>,
+ MortonCopy<true, PixelFormat::R32_FLOAT>,
+ MortonCopy<true, PixelFormat::R16_FLOAT>,
+ MortonCopy<true, PixelFormat::R16_UNORM>,
+ MortonCopy<true, PixelFormat::R16_SNORM>,
+ MortonCopy<true, PixelFormat::R16_UINT>,
+ MortonCopy<true, PixelFormat::R16_SINT>,
+ MortonCopy<true, PixelFormat::R16G16_UNORM>,
+ MortonCopy<true, PixelFormat::R16G16_FLOAT>,
+ MortonCopy<true, PixelFormat::R16G16_UINT>,
+ MortonCopy<true, PixelFormat::R16G16_SINT>,
+ MortonCopy<true, PixelFormat::R16G16_SNORM>,
+ MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
+ MortonCopy<true, PixelFormat::R8G8_UNORM>,
+ MortonCopy<true, PixelFormat::R8G8_SNORM>,
+ MortonCopy<true, PixelFormat::R8G8_SINT>,
+ MortonCopy<true, PixelFormat::R8G8_UINT>,
+ MortonCopy<true, PixelFormat::R32G32_UINT>,
+ MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
+ MortonCopy<true, PixelFormat::R32_UINT>,
+ MortonCopy<true, PixelFormat::R32_SINT>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
+ MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
+ MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
+ MortonCopy<true, PixelFormat::BC2_SRGB>,
+ MortonCopy<true, PixelFormat::BC3_SRGB>,
+ MortonCopy<true, PixelFormat::BC7_SRGB>,
+ MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X10>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_12X12>,
+ MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
- MortonCopy<true, PixelFormat::E5B9G9R9F>,
- MortonCopy<true, PixelFormat::Z32F>,
- MortonCopy<true, PixelFormat::Z16>,
- MortonCopy<true, PixelFormat::Z24S8>,
- MortonCopy<true, PixelFormat::S8Z24>,
- MortonCopy<true, PixelFormat::Z32FS8>,
+ MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
+ MortonCopy<true, PixelFormat::D32_FLOAT>,
+ MortonCopy<true, PixelFormat::D16_UNORM>,
+ MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
+ MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
+ MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
};
static constexpr ConversionArray linear_to_morton_fns = {
- MortonCopy<false, PixelFormat::ABGR8U>,
- MortonCopy<false, PixelFormat::ABGR8S>,
- MortonCopy<false, PixelFormat::ABGR8UI>,
- MortonCopy<false, PixelFormat::B5G6R5U>,
- MortonCopy<false, PixelFormat::A2B10G10R10U>,
- MortonCopy<false, PixelFormat::A1B5G5R5U>,
- MortonCopy<false, PixelFormat::R8U>,
- MortonCopy<false, PixelFormat::R8UI>,
- MortonCopy<false, PixelFormat::RGBA16F>,
- MortonCopy<false, PixelFormat::RGBA16S>,
- MortonCopy<false, PixelFormat::RGBA16U>,
- MortonCopy<false, PixelFormat::RGBA16UI>,
- MortonCopy<false, PixelFormat::R11FG11FB10F>,
- MortonCopy<false, PixelFormat::RGBA32UI>,
- MortonCopy<false, PixelFormat::DXT1>,
- MortonCopy<false, PixelFormat::DXT23>,
- MortonCopy<false, PixelFormat::DXT45>,
- MortonCopy<false, PixelFormat::DXN1>,
- MortonCopy<false, PixelFormat::DXN2UNORM>,
- MortonCopy<false, PixelFormat::DXN2SNORM>,
- MortonCopy<false, PixelFormat::BC7U>,
- MortonCopy<false, PixelFormat::BC6H_UF16>,
- MortonCopy<false, PixelFormat::BC6H_SF16>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
+ MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
+ MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
+ MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
+ MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
+ MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
+ MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
+ MortonCopy<false, PixelFormat::R8_UNORM>,
+ MortonCopy<false, PixelFormat::R8_SNORM>,
+ MortonCopy<false, PixelFormat::R8_SINT>,
+ MortonCopy<false, PixelFormat::R8_UINT>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
+ MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
+ MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
+ MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
+ MortonCopy<false, PixelFormat::BC2_UNORM>,
+ MortonCopy<false, PixelFormat::BC3_UNORM>,
+ MortonCopy<false, PixelFormat::BC4_UNORM>,
+ MortonCopy<false, PixelFormat::BC4_SNORM>,
+ MortonCopy<false, PixelFormat::BC5_UNORM>,
+ MortonCopy<false, PixelFormat::BC5_SNORM>,
+ MortonCopy<false, PixelFormat::BC7_UNORM>,
+ MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
+ MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
// TODO(Subv): Swizzling ASTC formats are not supported
nullptr,
- MortonCopy<false, PixelFormat::BGRA8>,
- MortonCopy<false, PixelFormat::RGBA32F>,
- MortonCopy<false, PixelFormat::RG32F>,
- MortonCopy<false, PixelFormat::R32F>,
- MortonCopy<false, PixelFormat::R16F>,
- MortonCopy<false, PixelFormat::R16U>,
- MortonCopy<false, PixelFormat::R16S>,
- MortonCopy<false, PixelFormat::R16UI>,
- MortonCopy<false, PixelFormat::R16I>,
- MortonCopy<false, PixelFormat::RG16>,
- MortonCopy<false, PixelFormat::RG16F>,
- MortonCopy<false, PixelFormat::RG16UI>,
- MortonCopy<false, PixelFormat::RG16I>,
- MortonCopy<false, PixelFormat::RG16S>,
- MortonCopy<false, PixelFormat::RGB32F>,
- MortonCopy<false, PixelFormat::RGBA8_SRGB>,
- MortonCopy<false, PixelFormat::RG8U>,
- MortonCopy<false, PixelFormat::RG8S>,
- MortonCopy<false, PixelFormat::RG8UI>,
- MortonCopy<false, PixelFormat::RG32UI>,
- MortonCopy<false, PixelFormat::RGBX16F>,
- MortonCopy<false, PixelFormat::R32UI>,
- MortonCopy<false, PixelFormat::R32I>,
+ MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
+ MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
+ MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
+ MortonCopy<false, PixelFormat::R32G32_FLOAT>,
+ MortonCopy<false, PixelFormat::R32G32_SINT>,
+ MortonCopy<false, PixelFormat::R32_FLOAT>,
+ MortonCopy<false, PixelFormat::R16_FLOAT>,
+ MortonCopy<false, PixelFormat::R16_UNORM>,
+ MortonCopy<false, PixelFormat::R16_SNORM>,
+ MortonCopy<false, PixelFormat::R16_UINT>,
+ MortonCopy<false, PixelFormat::R16_SINT>,
+ MortonCopy<false, PixelFormat::R16G16_UNORM>,
+ MortonCopy<false, PixelFormat::R16G16_FLOAT>,
+ MortonCopy<false, PixelFormat::R16G16_UINT>,
+ MortonCopy<false, PixelFormat::R16G16_SINT>,
+ MortonCopy<false, PixelFormat::R16G16_SNORM>,
+ MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
+ MortonCopy<false, PixelFormat::R8G8_UNORM>,
+ MortonCopy<false, PixelFormat::R8G8_SNORM>,
+ MortonCopy<false, PixelFormat::R8G8_SINT>,
+ MortonCopy<false, PixelFormat::R8G8_UINT>,
+ MortonCopy<false, PixelFormat::R32G32_UINT>,
+ MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
+ MortonCopy<false, PixelFormat::R32_UINT>,
+ MortonCopy<false, PixelFormat::R32_SINT>,
nullptr,
nullptr,
nullptr,
- MortonCopy<false, PixelFormat::BGRA8_SRGB>,
- MortonCopy<false, PixelFormat::DXT1_SRGB>,
- MortonCopy<false, PixelFormat::DXT23_SRGB>,
- MortonCopy<false, PixelFormat::DXT45_SRGB>,
- MortonCopy<false, PixelFormat::BC7U_SRGB>,
- MortonCopy<false, PixelFormat::R4G4B4A4U>,
+ MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
+ MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
+ MortonCopy<false, PixelFormat::BC2_SRGB>,
+ MortonCopy<false, PixelFormat::BC3_SRGB>,
+ MortonCopy<false, PixelFormat::BC7_SRGB>,
+ MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
nullptr,
nullptr,
nullptr,
@@ -199,12 +221,12 @@ static constexpr ConversionArray linear_to_morton_fns = {
nullptr,
nullptr,
nullptr,
- MortonCopy<false, PixelFormat::E5B9G9R9F>,
- MortonCopy<false, PixelFormat::Z32F>,
- MortonCopy<false, PixelFormat::Z16>,
- MortonCopy<false, PixelFormat::Z24S8>,
- MortonCopy<false, PixelFormat::S8Z24>,
- MortonCopy<false, PixelFormat::Z32FS8>,
+ MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
+ MortonCopy<false, PixelFormat::D32_FLOAT>,
+ MortonCopy<false, PixelFormat::D16_UNORM>,
+ MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
+ MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
+ MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
};
static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 2f75f8801..fc54ca0ef 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -91,14 +91,15 @@ private:
std::shared_ptr<HostCounter> last;
};
-template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
- class QueryPool>
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase {
public:
- explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
- : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
- static_cast<QueryCache&>(*this),
- VideoCore::QueryType::SamplesPassed}}} {}
+ explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_)
+ : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+ VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
@@ -118,29 +119,27 @@ public:
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
- auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr_opt);
- VAddr cpu_addr = *cpu_addr_opt;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
- CachedQuery* query = TryGet(cpu_addr);
+ CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
- ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+ ASSERT_OR_EXECUTE(cpu_addr, return;);
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
+ query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
- if (Settings::values.use_asynchronous_gpu_emulation) {
- AsyncFlushQuery(cpu_addr);
+ if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+ AsyncFlushQuery(*cpu_addr);
}
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
}
@@ -206,9 +205,6 @@ public:
committed_flushes.pop_front();
}
-protected:
- std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
-
private:
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
@@ -220,8 +216,8 @@ private:
return cache_begin < addr_end && addr_begin < cache_end;
};
- const u64 page_end = addr_end >> PAGE_SHIFT;
- for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+ const u64 page_end = addr_end >> PAGE_BITS;
+ for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
const auto& it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
continue;
@@ -242,14 +238,14 @@ private:
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
- const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
+ const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
host_ptr);
}
/// Tries to a get a cached query. Returns nullptr on failure.
CachedQuery* TryGet(VAddr addr) {
- const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+ const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
const auto it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
return nullptr;
@@ -268,10 +264,11 @@ private:
}
static constexpr std::uintptr_t PAGE_SIZE = 4096;
- static constexpr unsigned PAGE_SHIFT = 12;
+ static constexpr unsigned PAGE_BITS = 12;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4..000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/rasterizer_cache.h"
-
-RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 096ee337c..000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <mutex>
-#include <set>
-#include <unordered_map>
-
-#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range_core.hpp>
-
-#include "common/common_types.h"
-#include "core/settings.h"
-#include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
-
-class RasterizerCacheObject {
-public:
- explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
-
- virtual ~RasterizerCacheObject();
-
- VAddr GetCpuAddr() const {
- return cpu_addr;
- }
-
- /// Gets the size of the shader in guest memory, required for cache management
- virtual std::size_t GetSizeInBytes() const = 0;
-
- /// Sets whether the cached object should be considered registered
- void SetIsRegistered(bool registered) {
- is_registered = registered;
- }
-
- /// Returns true if the cached object is registered
- bool IsRegistered() const {
- return is_registered;
- }
-
- /// Returns true if the cached object is dirty
- bool IsDirty() const {
- return is_dirty;
- }
-
- /// Returns ticks from when this cached object was last modified
- u64 GetLastModifiedTicks() const {
- return last_modified_ticks;
- }
-
- /// Marks an object as recently modified, used to specify whether it is clean or dirty
- template <class T>
- void MarkAsModified(bool dirty, T& cache) {
- is_dirty = dirty;
- last_modified_ticks = cache.GetModifiedTicks();
- }
-
- void SetMemoryMarked(bool is_memory_marked_) {
- is_memory_marked = is_memory_marked_;
- }
-
- bool IsMemoryMarked() const {
- return is_memory_marked;
- }
-
- void SetSyncPending(bool is_sync_pending_) {
- is_sync_pending = is_sync_pending_;
- }
-
- bool IsSyncPending() const {
- return is_sync_pending;
- }
-
-private:
- bool is_registered{}; ///< Whether the object is currently registered with the cache
- bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
- bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
- bool is_sync_pending{}; ///< Whether the object is pending deletion.
- u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
- VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
-};
-
-template <class T>
-class RasterizerCache : NonCopyable {
- friend class RasterizerCacheObject;
-
-public:
- explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
-
- /// Write any cached resources overlapping the specified region back to memory
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
-
- const auto& objects{GetSortedObjectsFromRegion(addr, size)};
- for (auto& object : objects) {
- FlushObject(object);
- }
- }
-
- /// Mark the specified region as being invalidated
- void InvalidateRegion(VAddr addr, u64 size) {
- std::lock_guard lock{mutex};
-
- const auto& objects{GetSortedObjectsFromRegion(addr, size)};
- for (auto& object : objects) {
- if (!object->IsRegistered()) {
- // Skip duplicates
- continue;
- }
- Unregister(object);
- }
- }
-
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
-
- for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
- if (object->IsRegistered()) {
- UnmarkMemory(object);
- object->SetSyncPending(true);
- marked_for_unregister.emplace_back(object);
- }
- }
- }
-
- void SyncGuestHost() {
- std::lock_guard lock{mutex};
-
- for (const auto& object : marked_for_unregister) {
- if (object->IsRegistered()) {
- object->SetSyncPending(false);
- Unregister(object);
- }
- }
- marked_for_unregister.clear();
- }
-
- /// Invalidates everything in the cache
- void InvalidateAll() {
- std::lock_guard lock{mutex};
-
- while (interval_cache.begin() != interval_cache.end()) {
- Unregister(*interval_cache.begin()->second.begin());
- }
- }
-
-protected:
- /// Tries to get an object from the cache with the specified cache address
- T TryGet(VAddr addr) const {
- const auto iter = map_cache.find(addr);
- if (iter != map_cache.end())
- return iter->second;
- return nullptr;
- }
-
- /// Register an object into the cache
- virtual void Register(const T& object) {
- std::lock_guard lock{mutex};
-
- object->SetIsRegistered(true);
- interval_cache.add({GetInterval(object), ObjectSet{object}});
- map_cache.insert({object->GetCpuAddr(), object});
- rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
- object->SetMemoryMarked(true);
- }
-
- /// Unregisters an object from the cache
- virtual void Unregister(const T& object) {
- std::lock_guard lock{mutex};
-
- UnmarkMemory(object);
- object->SetIsRegistered(false);
- if (object->IsSyncPending()) {
- marked_for_unregister.remove(object);
- object->SetSyncPending(false);
- }
- const VAddr addr = object->GetCpuAddr();
- interval_cache.subtract({GetInterval(object), ObjectSet{object}});
- map_cache.erase(addr);
- }
-
- void UnmarkMemory(const T& object) {
- if (!object->IsMemoryMarked()) {
- return;
- }
- rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
- object->SetMemoryMarked(false);
- }
-
- /// Returns a ticks counter used for tracking when cached objects were last modified
- u64 GetModifiedTicks() {
- std::lock_guard lock{mutex};
-
- return ++modified_ticks;
- }
-
- virtual void FlushObjectInner(const T& object) = 0;
-
- /// Flushes the specified object, updating appropriate cache state as needed
- void FlushObject(const T& object) {
- std::lock_guard lock{mutex};
-
- if (!object->IsDirty()) {
- return;
- }
- FlushObjectInner(object);
- object->MarkAsModified(false, *this);
- }
-
- std::recursive_mutex mutex;
-
-private:
- /// Returns a list of cached objects from the specified memory region, ordered by access time
- std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
- if (size == 0) {
- return {};
- }
-
- std::vector<T> objects;
- const ObjectInterval interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
- for (auto& cached_object : pair.second) {
- if (!cached_object) {
- continue;
- }
- objects.push_back(cached_object);
- }
- }
-
- std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
- return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
- });
-
- return objects;
- }
-
- using ObjectSet = std::set<T>;
- using ObjectCache = std::unordered_map<VAddr, T>;
- using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
- using ObjectInterval = typename IntervalCache::interval_type;
-
- static auto GetInterval(const T& object) {
- return ObjectInterval::right_open(object->GetCpuAddr(),
- object->GetCpuAddr() + object->GetSizeInBytes());
- }
-
- ObjectCache map_cache;
- IntervalCache interval_cache; ///< Cache of objects
- u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
- VideoCore::RasterizerInterface& rasterizer;
- std::list<T> marked_for_unregister;
-};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 3cbdac8e7..b3e0919f8 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -106,11 +106,8 @@ public:
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
- virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
- const DiskResourceLoadCallback& callback = {}) {}
-
- /// Initializes renderer dirty flags
- virtual void SetupDirtyFlags() {}
+ virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ const DiskResourceLoadCallback& callback) {}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
GuestDriverProfile& AccessGuestDriverProfile() {
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 919d1f2d4..a93a1732c 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -9,7 +9,9 @@
namespace VideoCore {
-RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} {
+RendererBase::RendererBase(Core::Frontend::EmuWindow& window_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+ : render_window{window_}, context{std::move(context_)} {
RefreshBaseSettings();
}
@@ -18,7 +20,7 @@ RendererBase::~RendererBase() = default;
void RendererBase::RefreshBaseSettings() {
UpdateCurrentFramebufferLayout();
- renderer_settings.use_framelimiter = Settings::values.use_frame_limit;
+ renderer_settings.use_framelimiter = Settings::values.use_frame_limit.GetValue();
renderer_settings.set_background_color = true;
}
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d85219b6..5c650808b 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -15,7 +15,8 @@
namespace Core::Frontend {
class EmuWindow;
-}
+class GraphicsContext;
+} // namespace Core::Frontend
namespace VideoCore {
@@ -25,14 +26,15 @@ struct RendererSettings {
// Screenshot
std::atomic<bool> screenshot_requested{false};
- void* screenshot_bits;
+ void* screenshot_bits{};
std::function<void()> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout;
};
class RendererBase : NonCopyable {
public:
- explicit RendererBase(Core::Frontend::EmuWindow& window);
+ explicit RendererBase(Core::Frontend::EmuWindow& window,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
/// Initialize the renderer
@@ -44,11 +46,6 @@ public:
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
- /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
- /// specific implementation)
- /// Returns true if a frame was drawn
- virtual bool TryPresent(int timeout_ms) = 0;
-
// Getter/setter functions:
// ------------------------
@@ -68,6 +65,14 @@ public:
return *rasterizer;
}
+ Core::Frontend::GraphicsContext& Context() {
+ return *context;
+ }
+
+ const Core::Frontend::GraphicsContext& Context() const {
+ return *context;
+ }
+
Core::Frontend::EmuWindow& GetRenderWindow() {
return render_window;
}
@@ -94,6 +99,7 @@ public:
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<RasterizerInterface> rasterizer;
+ std::unique_ptr<Core::Frontend::GraphicsContext> context;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
new file mode 100644
index 000000000..b7e9ed2e9
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -0,0 +1,2102 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <variant>
+
+#include <fmt/format.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_arb_decompiler.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/shader_ir.h"
+
+// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
+// GLASM lacks booleans, so they have to be implemented as integers.
+// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
+// select between two values, because -1 will be evaluated as true and 0 as false.
+
+namespace OpenGL {
+
+namespace {
+
+using Tegra::Engines::ShaderType;
+using Tegra::Shader::Attribute;
+using Tegra::Shader::PixelImap;
+using Tegra::Shader::Register;
+using namespace VideoCommon::Shader;
+using Operation = const OperationNode&;
+
+constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
+
+char Swizzle(std::size_t component) {
+ ASSERT(component < 4);
+ return component["xyzw"];
+}
+
+constexpr bool IsGenericAttribute(Attribute::Index index) {
+ return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
+}
+
+u32 GetGenericAttributeIndex(Attribute::Index index) {
+ ASSERT(IsGenericAttribute(index));
+ return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
+std::string_view Modifiers(Operation operation) {
+ const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
+ if (meta && meta->precise) {
+ return ".PREC";
+ }
+ return "";
+}
+
+std::string_view GetInputFlags(PixelImap attribute) {
+ switch (attribute) {
+ case PixelImap::Perspective:
+ return "";
+ case PixelImap::Constant:
+ return "FLAT ";
+ case PixelImap::ScreenLinear:
+ return "NOPERSPECTIVE ";
+ case PixelImap::Unused:
+ break;
+ }
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+ return {};
+}
+
+std::string_view ImageType(Tegra::Shader::ImageType image_type) {
+ switch (image_type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return "1D";
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return "BUFFER";
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return "ARRAY1D";
+ case Tegra::Shader::ImageType::Texture2D:
+ return "2D";
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return "ARRAY2D";
+ case Tegra::Shader::ImageType::Texture3D:
+ return "3D";
+ }
+ UNREACHABLE();
+ return {};
+}
+
+std::string_view StackName(MetaStackClass stack) {
+ switch (stack) {
+ case MetaStackClass::Ssy:
+ return "SSY";
+ case MetaStackClass::Pbk:
+ return "PBK";
+ }
+ UNREACHABLE();
+ return "";
+};
+
+std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
+ switch (topology) {
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
+ return "POINTS";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
+ return "LINES";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
+ return "LINES_ADJACENCY";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
+ return "TRIANGLES";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
+ return "TRIANGLES_ADJACENCY";
+ default:
+ UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ return "POINTS";
+ }
+}
+
+std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
+ switch (topology) {
+ case Tegra::Shader::OutputTopology::PointList:
+ return "POINTS";
+ case Tegra::Shader::OutputTopology::LineStrip:
+ return "LINE_STRIP";
+ case Tegra::Shader::OutputTopology::TriangleStrip:
+ return "TRIANGLE_STRIP";
+ default:
+ UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+ return "points";
+ }
+}
+
+std::string_view StageInputName(ShaderType stage) {
+ switch (stage) {
+ case ShaderType::Vertex:
+ case ShaderType::Geometry:
+ return "vertex";
+ case ShaderType::Fragment:
+ return "fragment";
+ case ShaderType::Compute:
+ return "invocation";
+ default:
+ UNREACHABLE();
+ return "";
+ }
+}
+
+std::string TextureType(const MetaTexture& meta) {
+ if (meta.sampler.is_buffer) {
+ return "BUFFER";
+ }
+ std::string type;
+ if (meta.sampler.is_shadow) {
+ type += "SHADOW";
+ }
+ if (meta.sampler.is_array) {
+ type += "ARRAY";
+ }
+ type += [&meta] {
+ switch (meta.sampler.type) {
+ case Tegra::Shader::TextureType::Texture1D:
+ return "1D";
+ case Tegra::Shader::TextureType::Texture2D:
+ return "2D";
+ case Tegra::Shader::TextureType::Texture3D:
+ return "3D";
+ case Tegra::Shader::TextureType::TextureCube:
+ return "CUBE";
+ }
+ UNREACHABLE();
+ return "2D";
+ }();
+ return type;
+}
+
+class ARBDecompiler final {
+public:
+ explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier);
+
+ std::string Code() const {
+ return shader_source;
+ }
+
+private:
+ void DefineGlobalMemory();
+
+ void DeclareHeader();
+ void DeclareVertex();
+ void DeclareGeometry();
+ void DeclareFragment();
+ void DeclareCompute();
+ void DeclareInputAttributes();
+ void DeclareOutputAttributes();
+ void DeclareLocalMemory();
+ void DeclareGlobalMemory();
+ void DeclareConstantBuffers();
+ void DeclareRegisters();
+ void DeclareTemporaries();
+ void DeclarePredicates();
+ void DeclareInternalFlags();
+
+ void InitializeVariables();
+
+ void DecompileAST();
+ void DecompileBranchMode();
+
+ void VisitAST(const ASTNode& node);
+ std::string VisitExpression(const Expr& node);
+
+ void VisitBlock(const NodeBlock& bb);
+
+ std::string Visit(const Node& node);
+
+ std::pair<std::string, std::size_t> BuildCoords(Operation);
+ std::string BuildAoffi(Operation);
+ std::string GlobalMemoryPointer(const GmemNode& gmem);
+ void Exit();
+
+ std::string Assign(Operation);
+ std::string Select(Operation);
+ std::string FClamp(Operation);
+ std::string FCastHalf0(Operation);
+ std::string FCastHalf1(Operation);
+ std::string FSqrt(Operation);
+ std::string FSwizzleAdd(Operation);
+ std::string HAdd2(Operation);
+ std::string HMul2(Operation);
+ std::string HFma2(Operation);
+ std::string HAbsolute(Operation);
+ std::string HNegate(Operation);
+ std::string HClamp(Operation);
+ std::string HCastFloat(Operation);
+ std::string HUnpack(Operation);
+ std::string HMergeF32(Operation);
+ std::string HMergeH0(Operation);
+ std::string HMergeH1(Operation);
+ std::string HPack2(Operation);
+ std::string LogicalAssign(Operation);
+ std::string LogicalPick2(Operation);
+ std::string LogicalAnd2(Operation);
+ std::string FloatOrdered(Operation);
+ std::string FloatUnordered(Operation);
+ std::string LogicalAddCarry(Operation);
+ std::string Texture(Operation);
+ std::string TextureGather(Operation);
+ std::string TextureQueryDimensions(Operation);
+ std::string TextureQueryLod(Operation);
+ std::string TexelFetch(Operation);
+ std::string TextureGradient(Operation);
+ std::string ImageLoad(Operation);
+ std::string ImageStore(Operation);
+ std::string Branch(Operation);
+ std::string BranchIndirect(Operation);
+ std::string PushFlowStack(Operation);
+ std::string PopFlowStack(Operation);
+ std::string Exit(Operation);
+ std::string Discard(Operation);
+ std::string EmitVertex(Operation);
+ std::string EndPrimitive(Operation);
+ std::string InvocationId(Operation);
+ std::string YNegate(Operation);
+ std::string ThreadId(Operation);
+ std::string ShuffleIndexed(Operation);
+ std::string Barrier(Operation);
+ std::string MemoryBarrierGroup(Operation);
+ std::string MemoryBarrierGlobal(Operation);
+
+ template <const std::string_view& op>
+ std::string Unary(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
+ return temporary;
+ }
+
+ template <const std::string_view& op>
+ std::string Binary(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
+ Visit(operation[1]));
+ return temporary;
+ }
+
+ template <const std::string_view& op>
+ std::string Trinary(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
+ Visit(operation[1]), Visit(operation[2]));
+ return temporary;
+ }
+
+ template <const std::string_view& op, bool unordered>
+ std::string FloatComparison(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+
+ const std::string op_a = Visit(operation[0]);
+ const std::string op_b = Visit(operation[1]);
+ if constexpr (unordered) {
+ AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+ AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+ } else if (op == SNE_F) {
+ AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), 0;", temporary);
+ AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), 0;", temporary);
+ }
+ return temporary;
+ }
+
+ template <const std::string_view& op, bool is_nan>
+ std::string HalfComparison(Operation operation) {
+ std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ const std::string op_a = Visit(operation[0]);
+ const std::string op_b = Visit(operation[1]);
+ AddLine("UP2H.F {}, {};", tmp1, op_a);
+ AddLine("UP2H.F {}, {};", tmp2, op_b);
+ AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
+ AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
+ AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
+ AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
+ AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
+ if constexpr (is_nan) {
+ AddLine("MOVC.F RC.x, {};", op_a);
+ AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
+ AddLine("MOVC.F RC.x, {};", op_b);
+ AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
+ }
+ return tmp1;
+ }
+
+ template <const std::string_view& op, const std::string_view& type>
+ std::string AtomicImage(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t num_coords = operation.GetOperandsCount();
+ const std::size_t num_values = meta.values.size();
+
+ const std::string coord = AllocVectorTemporary();
+ const std::string value = AllocVectorTemporary();
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
+ }
+ for (std::size_t i = 0; i < num_values; ++i) {
+ AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
+ }
+
+ AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
+ image_id, ImageType(meta.image.type));
+ return fmt::format("{}.x", coord);
+ }
+
+ template <const std::string_view& op, const std::string_view& type>
+ std::string Atomic(Operation operation) {
+ std::string temporary = AllocTemporary();
+ std::string address;
+ std::string_view opname;
+ if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
+ address = GlobalMemoryPointer(*gmem);
+ opname = "ATOM";
+ } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
+ address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
+ opname = "ATOMS";
+ } else {
+ UNREACHABLE();
+ return "{0, 0, 0, 0}";
+ }
+ AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
+ return temporary;
+ }
+
+ template <char type>
+ std::string Negate(Operation operation) {
+ std::string temporary = AllocTemporary();
+ if constexpr (type == 'F') {
+ AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
+ } else {
+ AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
+ }
+ return temporary;
+ }
+
+ template <char type>
+ std::string Absolute(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
+ return temporary;
+ }
+
+ template <char type>
+ std::string BitfieldInsert(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
+ AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
+ AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
+ Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ template <char type>
+ std::string BitfieldExtract(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
+ AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
+ AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ template <char swizzle>
+ std::string LocalInvocationId(Operation) {
+ return fmt::format("invocation.localid.{}", swizzle);
+ }
+
+ template <char swizzle>
+ std::string WorkGroupId(Operation) {
+ return fmt::format("invocation.groupid.{}", swizzle);
+ }
+
+ template <char c1, char c2>
+ std::string ThreadMask(Operation) {
+ return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
+ }
+
+ template <typename... Args>
+ void AddExpression(std::string_view text, Args&&... args) {
+ shader_source += fmt::format(text, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ void AddLine(std::string_view text, Args&&... args) {
+ AddExpression(text, std::forward<Args>(args)...);
+ shader_source += '\n';
+ }
+
+ std::string AllocLongVectorTemporary() {
+ max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
+ return fmt::format("L{}", num_long_temporaries++);
+ }
+
+ std::string AllocLongTemporary() {
+ return fmt::format("{}.x", AllocLongVectorTemporary());
+ }
+
+ std::string AllocVectorTemporary() {
+ max_temporaries = std::max(max_temporaries, num_temporaries + 1);
+ return fmt::format("T{}", num_temporaries++);
+ }
+
+ std::string AllocTemporary() {
+ return fmt::format("{}.x", AllocVectorTemporary());
+ }
+
+ void ResetTemporaries() noexcept {
+ num_temporaries = 0;
+ num_long_temporaries = 0;
+ }
+
+ const Device& device;
+ const ShaderIR& ir;
+ const Registry& registry;
+ const ShaderType stage;
+
+ std::size_t num_temporaries = 0;
+ std::size_t max_temporaries = 0;
+
+ std::size_t num_long_temporaries = 0;
+ std::size_t max_long_temporaries = 0;
+
+ std::map<GlobalMemoryBase, u32> global_memory_names;
+
+ std::string shader_source;
+
+ static constexpr std::string_view ADD_F32 = "ADD.F32";
+ static constexpr std::string_view ADD_S = "ADD.S";
+ static constexpr std::string_view ADD_U = "ADD.U";
+ static constexpr std::string_view MUL_F32 = "MUL.F32";
+ static constexpr std::string_view MUL_S = "MUL.S";
+ static constexpr std::string_view MUL_U = "MUL.U";
+ static constexpr std::string_view DIV_F32 = "DIV.F32";
+ static constexpr std::string_view DIV_S = "DIV.S";
+ static constexpr std::string_view DIV_U = "DIV.U";
+ static constexpr std::string_view MAD_F32 = "MAD.F32";
+ static constexpr std::string_view RSQ_F32 = "RSQ.F32";
+ static constexpr std::string_view COS_F32 = "COS.F32";
+ static constexpr std::string_view SIN_F32 = "SIN.F32";
+ static constexpr std::string_view EX2_F32 = "EX2.F32";
+ static constexpr std::string_view LG2_F32 = "LG2.F32";
+ static constexpr std::string_view SLT_F = "SLT.F32";
+ static constexpr std::string_view SLT_S = "SLT.S";
+ static constexpr std::string_view SLT_U = "SLT.U";
+ static constexpr std::string_view SEQ_F = "SEQ.F32";
+ static constexpr std::string_view SEQ_S = "SEQ.S";
+ static constexpr std::string_view SEQ_U = "SEQ.U";
+ static constexpr std::string_view SLE_F = "SLE.F32";
+ static constexpr std::string_view SLE_S = "SLE.S";
+ static constexpr std::string_view SLE_U = "SLE.U";
+ static constexpr std::string_view SGT_F = "SGT.F32";
+ static constexpr std::string_view SGT_S = "SGT.S";
+ static constexpr std::string_view SGT_U = "SGT.U";
+ static constexpr std::string_view SNE_F = "SNE.F32";
+ static constexpr std::string_view SNE_S = "SNE.S";
+ static constexpr std::string_view SNE_U = "SNE.U";
+ static constexpr std::string_view SGE_F = "SGE.F32";
+ static constexpr std::string_view SGE_S = "SGE.S";
+ static constexpr std::string_view SGE_U = "SGE.U";
+ static constexpr std::string_view AND_S = "AND.S";
+ static constexpr std::string_view AND_U = "AND.U";
+ static constexpr std::string_view TRUNC_F = "TRUNC.F";
+ static constexpr std::string_view TRUNC_S = "TRUNC.S";
+ static constexpr std::string_view TRUNC_U = "TRUNC.U";
+ static constexpr std::string_view SHL_S = "SHL.S";
+ static constexpr std::string_view SHL_U = "SHL.U";
+ static constexpr std::string_view SHR_S = "SHR.S";
+ static constexpr std::string_view SHR_U = "SHR.U";
+ static constexpr std::string_view OR_S = "OR.S";
+ static constexpr std::string_view OR_U = "OR.U";
+ static constexpr std::string_view XOR_S = "XOR.S";
+ static constexpr std::string_view XOR_U = "XOR.U";
+ static constexpr std::string_view NOT_S = "NOT.S";
+ static constexpr std::string_view NOT_U = "NOT.U";
+ static constexpr std::string_view BTC_S = "BTC.S";
+ static constexpr std::string_view BTC_U = "BTC.U";
+ static constexpr std::string_view BTFM_S = "BTFM.S";
+ static constexpr std::string_view BTFM_U = "BTFM.U";
+ static constexpr std::string_view ROUND_F = "ROUND.F";
+ static constexpr std::string_view CEIL_F = "CEIL.F";
+ static constexpr std::string_view FLR_F = "FLR.F";
+ static constexpr std::string_view I2F_S = "I2F.S";
+ static constexpr std::string_view I2F_U = "I2F.U";
+ static constexpr std::string_view MIN_F = "MIN.F";
+ static constexpr std::string_view MIN_S = "MIN.S";
+ static constexpr std::string_view MIN_U = "MIN.U";
+ static constexpr std::string_view MAX_F = "MAX.F";
+ static constexpr std::string_view MAX_S = "MAX.S";
+ static constexpr std::string_view MAX_U = "MAX.U";
+ static constexpr std::string_view MOV_U = "MOV.U";
+ static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
+ static constexpr std::string_view TGALL_U = "TGALL.U";
+ static constexpr std::string_view TGANY_U = "TGANY.U";
+ static constexpr std::string_view TGEQ_U = "TGEQ.U";
+ static constexpr std::string_view EXCH = "EXCH";
+ static constexpr std::string_view ADD = "ADD";
+ static constexpr std::string_view MIN = "MIN";
+ static constexpr std::string_view MAX = "MAX";
+ static constexpr std::string_view AND = "AND";
+ static constexpr std::string_view OR = "OR";
+ static constexpr std::string_view XOR = "XOR";
+ static constexpr std::string_view U32 = "U32";
+ static constexpr std::string_view S32 = "S32";
+
+ static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
+ using DecompilerType = std::string (ARBDecompiler::*)(Operation);
+ static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
+ &ARBDecompiler::Assign,
+
+ &ARBDecompiler::Select,
+
+ &ARBDecompiler::Binary<ADD_F32>,
+ &ARBDecompiler::Binary<MUL_F32>,
+ &ARBDecompiler::Binary<DIV_F32>,
+ &ARBDecompiler::Trinary<MAD_F32>,
+ &ARBDecompiler::Negate<'F'>,
+ &ARBDecompiler::Absolute<'F'>,
+ &ARBDecompiler::FClamp,
+ &ARBDecompiler::FCastHalf0,
+ &ARBDecompiler::FCastHalf1,
+ &ARBDecompiler::Binary<MIN_F>,
+ &ARBDecompiler::Binary<MAX_F>,
+ &ARBDecompiler::Unary<COS_F32>,
+ &ARBDecompiler::Unary<SIN_F32>,
+ &ARBDecompiler::Unary<EX2_F32>,
+ &ARBDecompiler::Unary<LG2_F32>,
+ &ARBDecompiler::Unary<RSQ_F32>,
+ &ARBDecompiler::FSqrt,
+ &ARBDecompiler::Unary<ROUND_F>,
+ &ARBDecompiler::Unary<FLR_F>,
+ &ARBDecompiler::Unary<CEIL_F>,
+ &ARBDecompiler::Unary<TRUNC_F>,
+ &ARBDecompiler::Unary<I2F_S>,
+ &ARBDecompiler::Unary<I2F_U>,
+ &ARBDecompiler::FSwizzleAdd,
+
+ &ARBDecompiler::Binary<ADD_S>,
+ &ARBDecompiler::Binary<MUL_S>,
+ &ARBDecompiler::Binary<DIV_S>,
+ &ARBDecompiler::Negate<'S'>,
+ &ARBDecompiler::Absolute<'S'>,
+ &ARBDecompiler::Binary<MIN_S>,
+ &ARBDecompiler::Binary<MAX_S>,
+
+ &ARBDecompiler::Unary<TRUNC_S>,
+ &ARBDecompiler::Unary<MOV_U>,
+ &ARBDecompiler::Binary<SHL_S>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<SHR_S>,
+ &ARBDecompiler::Binary<AND_S>,
+ &ARBDecompiler::Binary<OR_S>,
+ &ARBDecompiler::Binary<XOR_S>,
+ &ARBDecompiler::Unary<NOT_S>,
+ &ARBDecompiler::BitfieldInsert<'S'>,
+ &ARBDecompiler::BitfieldExtract<'S'>,
+ &ARBDecompiler::Unary<BTC_S>,
+ &ARBDecompiler::Unary<BTFM_S>,
+
+ &ARBDecompiler::Binary<ADD_U>,
+ &ARBDecompiler::Binary<MUL_U>,
+ &ARBDecompiler::Binary<DIV_U>,
+ &ARBDecompiler::Binary<MIN_U>,
+ &ARBDecompiler::Binary<MAX_U>,
+ &ARBDecompiler::Unary<TRUNC_U>,
+ &ARBDecompiler::Unary<MOV_U>,
+ &ARBDecompiler::Binary<SHL_U>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<AND_U>,
+ &ARBDecompiler::Binary<OR_U>,
+ &ARBDecompiler::Binary<XOR_U>,
+ &ARBDecompiler::Unary<NOT_U>,
+ &ARBDecompiler::BitfieldInsert<'U'>,
+ &ARBDecompiler::BitfieldExtract<'U'>,
+ &ARBDecompiler::Unary<BTC_U>,
+ &ARBDecompiler::Unary<BTFM_U>,
+
+ &ARBDecompiler::HAdd2,
+ &ARBDecompiler::HMul2,
+ &ARBDecompiler::HFma2,
+ &ARBDecompiler::HAbsolute,
+ &ARBDecompiler::HNegate,
+ &ARBDecompiler::HClamp,
+ &ARBDecompiler::HCastFloat,
+ &ARBDecompiler::HUnpack,
+ &ARBDecompiler::HMergeF32,
+ &ARBDecompiler::HMergeH0,
+ &ARBDecompiler::HMergeH1,
+ &ARBDecompiler::HPack2,
+
+ &ARBDecompiler::LogicalAssign,
+ &ARBDecompiler::Binary<AND_U>,
+ &ARBDecompiler::Binary<OR_U>,
+ &ARBDecompiler::Binary<XOR_U>,
+ &ARBDecompiler::Unary<NOT_U>,
+ &ARBDecompiler::LogicalPick2,
+ &ARBDecompiler::LogicalAnd2,
+
+ &ARBDecompiler::FloatComparison<SLT_F, false>,
+ &ARBDecompiler::FloatComparison<SEQ_F, false>,
+ &ARBDecompiler::FloatComparison<SLE_F, false>,
+ &ARBDecompiler::FloatComparison<SGT_F, false>,
+ &ARBDecompiler::FloatComparison<SNE_F, false>,
+ &ARBDecompiler::FloatComparison<SGE_F, false>,
+ &ARBDecompiler::FloatOrdered,
+ &ARBDecompiler::FloatUnordered,
+ &ARBDecompiler::FloatComparison<SLT_F, true>,
+ &ARBDecompiler::FloatComparison<SEQ_F, true>,
+ &ARBDecompiler::FloatComparison<SLE_F, true>,
+ &ARBDecompiler::FloatComparison<SGT_F, true>,
+ &ARBDecompiler::FloatComparison<SNE_F, true>,
+ &ARBDecompiler::FloatComparison<SGE_F, true>,
+
+ &ARBDecompiler::Binary<SLT_S>,
+ &ARBDecompiler::Binary<SEQ_S>,
+ &ARBDecompiler::Binary<SLE_S>,
+ &ARBDecompiler::Binary<SGT_S>,
+ &ARBDecompiler::Binary<SNE_S>,
+ &ARBDecompiler::Binary<SGE_S>,
+
+ &ARBDecompiler::Binary<SLT_U>,
+ &ARBDecompiler::Binary<SEQ_U>,
+ &ARBDecompiler::Binary<SLE_U>,
+ &ARBDecompiler::Binary<SGT_U>,
+ &ARBDecompiler::Binary<SNE_U>,
+ &ARBDecompiler::Binary<SGE_U>,
+
+ &ARBDecompiler::LogicalAddCarry,
+
+ &ARBDecompiler::HalfComparison<SLT_F, false>,
+ &ARBDecompiler::HalfComparison<SEQ_F, false>,
+ &ARBDecompiler::HalfComparison<SLE_F, false>,
+ &ARBDecompiler::HalfComparison<SGT_F, false>,
+ &ARBDecompiler::HalfComparison<SNE_F, false>,
+ &ARBDecompiler::HalfComparison<SGE_F, false>,
+ &ARBDecompiler::HalfComparison<SLT_F, true>,
+ &ARBDecompiler::HalfComparison<SEQ_F, true>,
+ &ARBDecompiler::HalfComparison<SLE_F, true>,
+ &ARBDecompiler::HalfComparison<SGT_F, true>,
+ &ARBDecompiler::HalfComparison<SNE_F, true>,
+ &ARBDecompiler::HalfComparison<SGE_F, true>,
+
+ &ARBDecompiler::Texture,
+ &ARBDecompiler::Texture,
+ &ARBDecompiler::TextureGather,
+ &ARBDecompiler::TextureQueryDimensions,
+ &ARBDecompiler::TextureQueryLod,
+ &ARBDecompiler::TexelFetch,
+ &ARBDecompiler::TextureGradient,
+
+ &ARBDecompiler::ImageLoad,
+ &ARBDecompiler::ImageStore,
+
+ &ARBDecompiler::AtomicImage<ADD, U32>,
+ &ARBDecompiler::AtomicImage<AND, U32>,
+ &ARBDecompiler::AtomicImage<OR, U32>,
+ &ARBDecompiler::AtomicImage<XOR, U32>,
+ &ARBDecompiler::AtomicImage<EXCH, U32>,
+
+ &ARBDecompiler::Atomic<EXCH, U32>,
+ &ARBDecompiler::Atomic<ADD, U32>,
+ &ARBDecompiler::Atomic<MIN, U32>,
+ &ARBDecompiler::Atomic<MAX, U32>,
+ &ARBDecompiler::Atomic<AND, U32>,
+ &ARBDecompiler::Atomic<OR, U32>,
+ &ARBDecompiler::Atomic<XOR, U32>,
+
+ &ARBDecompiler::Atomic<EXCH, S32>,
+ &ARBDecompiler::Atomic<ADD, S32>,
+ &ARBDecompiler::Atomic<MIN, S32>,
+ &ARBDecompiler::Atomic<MAX, S32>,
+ &ARBDecompiler::Atomic<AND, S32>,
+ &ARBDecompiler::Atomic<OR, S32>,
+ &ARBDecompiler::Atomic<XOR, S32>,
+
+ &ARBDecompiler::Atomic<ADD, U32>,
+ &ARBDecompiler::Atomic<MIN, U32>,
+ &ARBDecompiler::Atomic<MAX, U32>,
+ &ARBDecompiler::Atomic<AND, U32>,
+ &ARBDecompiler::Atomic<OR, U32>,
+ &ARBDecompiler::Atomic<XOR, U32>,
+
+ &ARBDecompiler::Atomic<ADD, S32>,
+ &ARBDecompiler::Atomic<MIN, S32>,
+ &ARBDecompiler::Atomic<MAX, S32>,
+ &ARBDecompiler::Atomic<AND, S32>,
+ &ARBDecompiler::Atomic<OR, S32>,
+ &ARBDecompiler::Atomic<XOR, S32>,
+
+ &ARBDecompiler::Branch,
+ &ARBDecompiler::BranchIndirect,
+ &ARBDecompiler::PushFlowStack,
+ &ARBDecompiler::PopFlowStack,
+ &ARBDecompiler::Exit,
+ &ARBDecompiler::Discard,
+
+ &ARBDecompiler::EmitVertex,
+ &ARBDecompiler::EndPrimitive,
+
+ &ARBDecompiler::InvocationId,
+ &ARBDecompiler::YNegate,
+ &ARBDecompiler::LocalInvocationId<'x'>,
+ &ARBDecompiler::LocalInvocationId<'y'>,
+ &ARBDecompiler::LocalInvocationId<'z'>,
+ &ARBDecompiler::WorkGroupId<'x'>,
+ &ARBDecompiler::WorkGroupId<'y'>,
+ &ARBDecompiler::WorkGroupId<'z'>,
+
+ &ARBDecompiler::Unary<TGBALLOT_U>,
+ &ARBDecompiler::Unary<TGALL_U>,
+ &ARBDecompiler::Unary<TGANY_U>,
+ &ARBDecompiler::Unary<TGEQ_U>,
+
+ &ARBDecompiler::ThreadId,
+ &ARBDecompiler::ThreadMask<'e', 'q'>,
+ &ARBDecompiler::ThreadMask<'g', 'e'>,
+ &ARBDecompiler::ThreadMask<'g', 't'>,
+ &ARBDecompiler::ThreadMask<'l', 'e'>,
+ &ARBDecompiler::ThreadMask<'l', 't'>,
+ &ARBDecompiler::ShuffleIndexed,
+
+ &ARBDecompiler::Barrier,
+ &ARBDecompiler::MemoryBarrierGroup,
+ &ARBDecompiler::MemoryBarrierGlobal,
+ };
+};
+
+ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier)
+ : device{device}, ir{ir}, registry{registry}, stage{stage} {
+ DefineGlobalMemory();
+
+ AddLine("TEMP RC;");
+ AddLine("TEMP FSWZA[4];");
+ AddLine("TEMP FSWZB[4];");
+ if (ir.IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+ AddLine("END");
+
+ const std::string code = std::move(shader_source);
+ DeclareHeader();
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareFragment();
+ DeclareCompute();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareLocalMemory();
+ DeclareGlobalMemory();
+ DeclareConstantBuffers();
+ DeclareRegisters();
+ DeclareTemporaries();
+ DeclarePredicates();
+ DeclareInternalFlags();
+
+ shader_source += code;
+}
+
+std::string_view HeaderStageName(ShaderType stage) {
+ switch (stage) {
+ case ShaderType::Vertex:
+ return "vp";
+ case ShaderType::Geometry:
+ return "gp";
+ case ShaderType::Fragment:
+ return "fp";
+ case ShaderType::Compute:
+ return "cp";
+ default:
+ UNREACHABLE();
+ return "";
+ }
+}
+
+void ARBDecompiler::DefineGlobalMemory() {
+ u32 binding = 0;
+ for (const auto& pair : ir.GetGlobalMemory()) {
+ const GlobalMemoryBase base = pair.first;
+ global_memory_names.emplace(base, binding);
+ ++binding;
+ }
+}
+
+void ARBDecompiler::DeclareHeader() {
+ AddLine("!!NV{}5.0", HeaderStageName(stage));
+ // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
+ AddLine("OPTION NV_internal;");
+ AddLine("OPTION NV_gpu_program_fp64;");
+ AddLine("OPTION NV_shader_thread_group;");
+ if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
+ AddLine("OPTION NV_shader_thread_shuffle;");
+ }
+ if (stage == ShaderType::Vertex) {
+ if (device.HasNvViewportArray2()) {
+ AddLine("OPTION NV_viewport_array2;");
+ }
+ }
+ if (stage == ShaderType::Fragment) {
+ AddLine("OPTION ARB_draw_buffers;");
+ }
+ if (device.HasImageLoadFormatted()) {
+ AddLine("OPTION EXT_shader_image_load_formatted;");
+ }
+}
+
+void ARBDecompiler::DeclareVertex() {
+ if (stage != ShaderType::Vertex) {
+ return;
+ }
+ AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
+}
+
+void ARBDecompiler::DeclareGeometry() {
+ if (stage != ShaderType::Geometry) {
+ return;
+ }
+ const auto& info = registry.GetGraphicsInfo();
+ const auto& header = ir.GetHeader();
+ AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
+ AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
+ AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
+ AddLine("ATTRIB vertex_position = vertex.position;");
+}
+
+void ARBDecompiler::DeclareFragment() {
+ if (stage != ShaderType::Fragment) {
+ return;
+ }
+ AddLine("OUTPUT result_color7 = result.color[7];");
+ AddLine("OUTPUT result_color6 = result.color[6];");
+ AddLine("OUTPUT result_color5 = result.color[5];");
+ AddLine("OUTPUT result_color4 = result.color[4];");
+ AddLine("OUTPUT result_color3 = result.color[3];");
+ AddLine("OUTPUT result_color2 = result.color[2];");
+ AddLine("OUTPUT result_color1 = result.color[1];");
+ AddLine("OUTPUT result_color0 = result.color;");
+}
+
+void ARBDecompiler::DeclareCompute() {
+ if (stage != ShaderType::Compute) {
+ return;
+ }
+ const ComputeInfo& info = registry.GetComputeInfo();
+ AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
+ info.workgroup_size[2]);
+ if (info.shared_memory_size_in_words == 0) {
+ return;
+ }
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ u32 size_in_bytes = info.shared_memory_size_in_words * 4;
+ if (size_in_bytes > limit) {
+ LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+ size_in_bytes, limit);
+ size_in_bytes = limit;
+ }
+
+ AddLine("SHARED_MEMORY {};", size_in_bytes);
+ AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
+}
+
+void ARBDecompiler::DeclareInputAttributes() {
+ if (stage == ShaderType::Compute) {
+ return;
+ }
+ const std::string_view stage_name = StageInputName(stage);
+ for (const auto attribute : ir.GetInputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+
+ std::string_view suffix;
+ if (stage == ShaderType::Fragment) {
+ const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
+ if (input_mode == PixelImap::Unused) {
+ return;
+ }
+ suffix = GetInputFlags(input_mode);
+ }
+ AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
+ index);
+ }
+}
+
+void ARBDecompiler::DeclareOutputAttributes() {
+ if (stage == ShaderType::Compute) {
+ return;
+ }
+ for (const auto attribute : ir.GetOutputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+ AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
+ }
+}
+
+void ARBDecompiler::DeclareLocalMemory() {
+ u64 size = 0;
+ if (stage == ShaderType::Compute) {
+ size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
+ } else {
+ size = ir.GetHeader().GetLocalMemorySize();
+ }
+ if (size == 0) {
+ return;
+ }
+ const u64 element_count = Common::AlignUp(size, 4) / 4;
+ AddLine("TEMP lmem[{}];", element_count);
+}
+
+void ARBDecompiler::DeclareGlobalMemory() {
+ const std::size_t num_entries = ir.GetGlobalMemory().size();
+ if (num_entries > 0) {
+ const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
+ AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
+ }
+}
+
+void ARBDecompiler::DeclareConstantBuffers() {
+ u32 binding = 0;
+ for (const auto& cbuf : ir.GetConstantBuffers()) {
+ AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
+ ++binding;
+ }
+}
+
+void ARBDecompiler::DeclareRegisters() {
+ for (const u32 gpr : ir.GetRegisters()) {
+ AddLine("TEMP R{};", gpr);
+ }
+}
+
+void ARBDecompiler::DeclareTemporaries() {
+ for (std::size_t i = 0; i < max_temporaries; ++i) {
+ AddLine("TEMP T{};", i);
+ }
+ for (std::size_t i = 0; i < max_long_temporaries; ++i) {
+ AddLine("LONG TEMP L{};", i);
+ }
+}
+
+void ARBDecompiler::DeclarePredicates() {
+ for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
+ AddLine("TEMP P{};", static_cast<u64>(pred));
+ }
+}
+
+void ARBDecompiler::DeclareInternalFlags() {
+ for (const char* name : INTERNAL_FLAG_NAMES) {
+ AddLine("TEMP {};", name);
+ }
+}
+
+void ARBDecompiler::InitializeVariables() {
+ AddLine("MOV.F32 FSWZA[0], -1;");
+ AddLine("MOV.F32 FSWZA[1], 1;");
+ AddLine("MOV.F32 FSWZA[2], -1;");
+ AddLine("MOV.F32 FSWZA[3], 0;");
+ AddLine("MOV.F32 FSWZB[0], -1;");
+ AddLine("MOV.F32 FSWZB[1], -1;");
+ AddLine("MOV.F32 FSWZB[2], 1;");
+ AddLine("MOV.F32 FSWZB[3], -1;");
+
+ if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
+ AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
+ }
+ for (const auto attribute : ir.GetOutputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+ AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
+ }
+ for (const u32 gpr : ir.GetRegisters()) {
+ AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
+ }
+ for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
+ AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
+ }
+}
+
+void ARBDecompiler::DecompileAST() {
+ const u32 num_flow_variables = ir.GetASTNumVariables();
+ for (u32 i = 0; i < num_flow_variables; ++i) {
+ AddLine("TEMP F{};", i);
+ }
+ for (u32 i = 0; i < num_flow_variables; ++i) {
+ AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
+ }
+
+ InitializeVariables();
+
+ VisitAST(ir.GetASTProgram());
+}
+
+void ARBDecompiler::DecompileBranchMode() {
+ static constexpr u32 FLOW_STACK_SIZE = 20;
+ if (!ir.IsFlowStackDisabled()) {
+ AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
+ AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
+ AddLine("TEMP SSY_TOP;");
+ AddLine("TEMP PBK_TOP;");
+ }
+
+ AddLine("TEMP PC;");
+
+ if (!ir.IsFlowStackDisabled()) {
+ AddLine("MOV.U SSY_TOP.x, 0;");
+ AddLine("MOV.U PBK_TOP.x, 0;");
+ }
+
+ InitializeVariables();
+
+ const auto basic_block_end = ir.GetBasicBlocks().end();
+ auto basic_block_it = ir.GetBasicBlocks().begin();
+ const u32 first_address = basic_block_it->first;
+ AddLine("MOV.U PC.x, {};", first_address);
+
+ AddLine("REP;");
+
+ std::size_t num_blocks = 0;
+ while (basic_block_it != basic_block_end) {
+ const auto& [address, bb] = *basic_block_it;
+ ++num_blocks;
+
+ AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
+ AddLine("IF NE.x;");
+
+ VisitBlock(bb);
+
+ ++basic_block_it;
+
+ if (basic_block_it != basic_block_end) {
+ const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
+ if (!op || op->GetCode() != OperationCode::Branch) {
+ const u32 next_address = basic_block_it->first;
+ AddLine("MOV.U PC.x, {};", next_address);
+ AddLine("CONT;");
+ }
+ }
+
+ AddLine("ELSE;");
+ }
+ AddLine("RET;");
+ while (num_blocks--) {
+ AddLine("ENDIF;");
+ }
+
+ AddLine("ENDREP;");
+}
+
+void ARBDecompiler::VisitAST(const ASTNode& node) {
+ if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(ast->condition);
+ ResetTemporaries();
+
+ AddLine("MOVC.U RC.x, {};", condition);
+ AddLine("IF NE.x;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ AddLine("ENDIF;");
+ } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
+ AddLine("ELSE;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
+ VisitBlock(ast->nodes);
+ } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
+ AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
+ ResetTemporaries();
+ } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(ast->condition);
+ ResetTemporaries();
+ AddLine("REP;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ AddLine("MOVC.U RC.x, {};", condition);
+ AddLine("BRK (NE.x);");
+ AddLine("ENDREP;");
+ } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
+ const bool is_true = ExprIsTrue(ast->condition);
+ if (!is_true) {
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("IF NE.x;");
+ ResetTemporaries();
+ }
+ if (ast->kills) {
+ AddLine("KIL TR;");
+ } else {
+ Exit();
+ }
+ if (!is_true) {
+ AddLine("ENDIF;");
+ }
+ } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
+ if (ExprIsTrue(ast->condition)) {
+ AddLine("BRK;");
+ } else {
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("BRK (NE.x);");
+ ResetTemporaries();
+ }
+ } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
+ // Nothing to do
+ } else {
+ UNREACHABLE();
+ }
+}
+
+std::string ARBDecompiler::VisitExpression(const Expr& node) {
+ if (const auto expr = std::get_if<ExprAnd>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
+ VisitExpression(expr->operand2));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprOr>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
+ VisitExpression(expr->operand2));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprNot>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
+ return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
+ }
+ if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
+ return Visit(ir.GetConditionCode(expr->cc));
+ }
+ if (const auto expr = std::get_if<ExprVar>(&*node)) {
+ return fmt::format("F{}.x", expr->var_index);
+ }
+ if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
+ return expr->value ? "0xffffffff" : "0";
+ }
+ if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
+ return result;
+ }
+ UNREACHABLE();
+ return "0";
+}
+
+void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
+ for (const auto& node : bb) {
+ Visit(node);
+ }
+}
+
+std::string ARBDecompiler::Visit(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
+ if (const auto amend_index = operation->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index));
+ }
+ const std::size_t index = static_cast<std::size_t>(operation->GetCode());
+ if (index >= OPERATION_DECOMPILERS.size()) {
+ UNREACHABLE_MSG("Out of bounds operation: {}", index);
+ return {};
+ }
+ const auto decompiler = OPERATION_DECOMPILERS[index];
+ if (decompiler == nullptr) {
+ UNREACHABLE_MSG("Undefined operation: {}", index);
+ return {};
+ }
+ return (this->*decompiler)(*operation);
+ }
+
+ if (const auto gpr = std::get_if<GprNode>(&*node)) {
+ const u32 index = gpr->GetIndex();
+ if (index == Register::ZeroIndex) {
+ return "{0, 0, 0, 0}.x";
+ }
+ return fmt::format("R{}.x", index);
+ }
+
+ if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+ return fmt::format("CV{}.x", cv->GetIndex());
+ }
+
+ if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
+ return temporary;
+ }
+
+ if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
+ std::string temporary = AllocTemporary();
+ switch (const auto index = predicate->GetIndex(); index) {
+ case Tegra::Shader::Pred::UnusedIndex:
+ AddLine("MOV.S {}, -1;", temporary);
+ break;
+ case Tegra::Shader::Pred::NeverExecute:
+ AddLine("MOV.S {}, 0;", temporary);
+ break;
+ default:
+ AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
+ break;
+ }
+ if (predicate->IsNegated()) {
+ AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
+ }
+ return temporary;
+ }
+
+ if (const auto abuf = std::get_if<AbufNode>(&*node)) {
+ if (abuf->IsPhysicalBuffer()) {
+ UNIMPLEMENTED_MSG("Physical buffers are not implemented");
+ return "{0, 0, 0, 0}.x";
+ }
+
+ const Attribute::Index index = abuf->GetIndex();
+ const u32 element = abuf->GetElement();
+ const char swizzle = Swizzle(element);
+ switch (index) {
+ case Attribute::Index::Position: {
+ if (stage == ShaderType::Geometry) {
+ return fmt::format("{}_position[{}].{}", StageInputName(stage),
+ Visit(abuf->GetBuffer()), swizzle);
+ } else {
+ return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
+ }
+ }
+ case Attribute::Index::TessCoordInstanceIDVertexID:
+ ASSERT(stage == ShaderType::Vertex);
+ switch (element) {
+ case 2:
+ return "vertex.instance";
+ case 3:
+ return "vertex.id";
+ }
+ UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+ break;
+ case Attribute::Index::PointCoord:
+ switch (element) {
+ case 0:
+ return "fragment.pointcoord.x";
+ case 1:
+ return "fragment.pointcoord.y";
+ }
+ UNIMPLEMENTED();
+ break;
+ case Attribute::Index::FrontFacing: {
+ ASSERT(stage == ShaderType::Fragment);
+ ASSERT(element == 3);
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
+ AddLine("MOV.U.CC RC.x, -RC;");
+ AddLine("MOV.S {}.x, 0;", temporary);
+ AddLine("MOV.S {}.x (NE.x), -1;", temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ default:
+ if (IsGenericAttribute(index)) {
+ if (stage == ShaderType::Geometry) {
+ return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
+ Visit(abuf->GetBuffer()), swizzle);
+ } else {
+ return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
+ GetGenericAttributeIndex(index), swizzle);
+ }
+ }
+ UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
+ break;
+ }
+ return "{0, 0, 0, 0}.x";
+ }
+
+ if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
+ std::string offset_string;
+ const auto& offset = cbuf->GetOffset();
+ if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
+ offset_string = std::to_string(imm->GetValue());
+ } else {
+ offset_string = Visit(offset);
+ }
+ std::string temporary = AllocTemporary();
+ AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
+ return temporary;
+ }
+
+ if (const auto gmem = std::get_if<GmemNode>(&*node)) {
+ std::string temporary = AllocTemporary();
+ AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
+ return temporary;
+ }
+
+ if (const auto lmem = std::get_if<LmemNode>(&*node)) {
+ std::string temporary = Visit(lmem->GetAddress());
+ AddLine("SHR.U {}, {}, 2;", temporary, temporary);
+ AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
+ return temporary;
+ }
+
+ if (const auto smem = std::get_if<SmemNode>(&*node)) {
+ std::string temporary = Visit(smem->GetAddress());
+ AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
+ return temporary;
+ }
+
+ if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
+ const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
+ return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
+ }
+
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ if (const auto amend_index = conditional->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index));
+ }
+ AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
+ AddLine("IF NE.x;");
+ VisitBlock(conditional->GetCode());
+ AddLine("ENDIF;");
+ return {};
+ }
+
+ if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
+ // Uncommenting this will generate invalid code. GLASM lacks comments.
+ // AddLine("// {}", cmt->GetText());
+ return {};
+ }
+
+ UNIMPLEMENTED();
+ return {};
+}
+
+std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ UNIMPLEMENTED_IF(meta.sampler.is_indexed);
+ UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
+ meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
+
+ const std::size_t count = operation.GetOperandsCount();
+ std::string temporary = AllocVectorTemporary();
+ std::size_t i = 0;
+ for (; i < count; ++i) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ if (meta.sampler.is_array) {
+ AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
+ }
+ if (meta.sampler.is_shadow) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
+ }
+ return {std::move(temporary), i};
+}
+
+std::string ARBDecompiler::BuildAoffi(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ if (meta.aoffi.empty()) {
+ return {};
+ }
+ const std::string temporary = AllocVectorTemporary();
+ std::size_t i = 0;
+ for (auto& node : meta.aoffi) {
+ AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
+ }
+ return fmt::format(", offset({})", temporary);
+}
+
+std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
+ const u32 binding = global_memory_names.at(gmem.GetDescriptor());
+ const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
+
+ const std::string pointer = AllocLongVectorTemporary();
+ std::string temporary = AllocTemporary();
+
+ const u32 local_index = binding / 2;
+ AddLine("PK64.U {}, c[{}];", pointer, local_index);
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
+ Visit(gmem.GetBaseAddress()));
+ AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
+ AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
+ return fmt::format("{}.x", pointer);
+}
+
+void ARBDecompiler::Exit() {
+ if (stage != ShaderType::Fragment) {
+ AddLine("RET;");
+ return;
+ }
+
+ const auto safe_get_register = [this](u32 reg) -> std::string {
+ // TODO(Rodrigo): Replace with contains once C++20 releases
+ const auto& used_registers = ir.GetRegisters();
+ if (used_registers.find(reg) != used_registers.end()) {
+ return fmt::format("R{}.x", reg);
+ }
+ return "{0, 0, 0, 0}.x";
+ };
+
+ const auto& header = ir.GetHeader();
+ u32 current_reg = 0;
+ for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
+ for (u32 component = 0; component < 4; ++component) {
+ if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
+ continue;
+ }
+ AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
+ safe_get_register(current_reg));
+ ++current_reg;
+ }
+ }
+ if (header.ps.omap.depth) {
+ AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
+ }
+
+ AddLine("RET;");
+}
+
+std::string ARBDecompiler::Assign(Operation operation) {
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
+
+ std::string dest_name;
+ if (const auto gpr = std::get_if<GprNode>(&*dest)) {
+ if (gpr->GetIndex() == Register::ZeroIndex) {
+ // Writing to Register::ZeroIndex is a no op
+ return {};
+ }
+ dest_name = fmt::format("R{}.x", gpr->GetIndex());
+ } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
+ const u32 element = abuf->GetElement();
+ const char swizzle = Swizzle(element);
+ switch (const Attribute::Index index = abuf->GetIndex()) {
+ case Attribute::Index::Position:
+ dest_name = fmt::format("result.position.{}", swizzle);
+ break;
+ case Attribute::Index::LayerViewportPointSize:
+ switch (element) {
+ case 0:
+ UNIMPLEMENTED();
+ return {};
+ case 1:
+ case 2:
+ if (!device.HasNvViewportArray2()) {
+ LOG_ERROR(
+ Render_OpenGL,
+ "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
+ return {};
+ }
+ dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
+ break;
+ case 3:
+ dest_name = "result.pointsize.x";
+ break;
+ }
+ break;
+ case Attribute::Index::ClipDistances0123:
+ dest_name = fmt::format("result.clip[{}].x", element);
+ break;
+ case Attribute::Index::ClipDistances4567:
+ dest_name = fmt::format("result.clip[{}].x", element + 4);
+ break;
+ default:
+ if (!IsGenericAttribute(index)) {
+ UNREACHABLE();
+ return {};
+ }
+ dest_name =
+ fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
+ break;
+ }
+ } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
+ const std::string address = Visit(lmem->GetAddress());
+ AddLine("SHR.U {}, {}, 2;", address, address);
+ dest_name = fmt::format("lmem[{}].x", address);
+ } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
+ AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
+ ResetTemporaries();
+ return {};
+ } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
+ AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
+ ResetTemporaries();
+ return {};
+ } else {
+ UNREACHABLE();
+ ResetTemporaries();
+ return {};
+ }
+
+ AddLine("MOV.U {}, {};", dest_name, Visit(src));
+ ResetTemporaries();
+ return {};
+}
+
+std::string ARBDecompiler::Select(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
+ Visit(operation[2]));
+ return temporary;
+}
+
+std::string ARBDecompiler::FClamp(Operation operation) {
+ // 1.0f in hex, replace with std::bit_cast on C++20
+ static constexpr u32 POSITIVE_ONE = 0x3f800000;
+
+ std::string temporary = AllocTemporary();
+ const Node& value = operation[0];
+ const Node& low = operation[1];
+ const Node& high = operation[2];
+ const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
+ const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
+ if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
+ AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
+ } else {
+ AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
+ AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
+ }
+ return temporary;
+}
+
+std::string ARBDecompiler::FCastHalf0(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::FCastHalf1(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
+ AddLine("MOV {}.x, {}.y;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::FSqrt(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
+ AddLine("RCP.F32 {}, {};", temporary, temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "NV_shader_thread_shuffle is missing. Kepler or better is required.");
+ AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
+ AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
+ AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
+ AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
+ AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
+ AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
+ AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HAdd2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HMul2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HFma2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ const std::string tmp3 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
+ AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HAbsolute(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HNegate(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
+ AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
+ AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
+ AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HClamp(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
+ AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
+ AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
+ AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
+ AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HCastFloat(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
+ AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HUnpack(Operation operation) {
+ std::string operand = Visit(operation[0]);
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.U {}.x, {};", temporary, operand);
+ AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ case Tegra::Shader::HalfType::H0_H0: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, operand);
+ AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ case Tegra::Shader::HalfType::H1_H1: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, operand);
+ AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ }
+ UNREACHABLE();
+ return "{0, 0, 0, 0}.x";
+}
+
+std::string ARBDecompiler::HMergeF32(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HMergeH0(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
+ AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HMergeH1(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
+ AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HPack2(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
+ AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::LogicalAssign(Operation operation) {
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
+
+ std::string target;
+
+ if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
+ ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
+
+ const Tegra::Shader::Pred index = pred->GetIndex();
+ switch (index) {
+ case Tegra::Shader::Pred::NeverExecute:
+ case Tegra::Shader::Pred::UnusedIndex:
+ // Writing to these predicates is a no-op
+ return {};
+ }
+ target = fmt::format("P{}.x", static_cast<u64>(index));
+ } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
+ const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
+ target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
+ } else {
+ UNREACHABLE();
+ ResetTemporaries();
+ return {};
+ }
+
+ AddLine("MOV.U {}, {};", target, Visit(src));
+ ResetTemporaries();
+ return {};
+}
+
+std::string ARBDecompiler::LogicalPick2(Operation operation) {
+ std::string temporary = AllocTemporary();
+ const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
+ AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
+ return temporary;
+}
+
+std::string ARBDecompiler::LogicalAnd2(Operation operation) {
+ std::string temporary = AllocTemporary();
+ const std::string op = Visit(operation[0]);
+ AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
+ return temporary;
+}
+
+std::string ARBDecompiler::FloatOrdered(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
+ AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
+ AddLine("MOV.S {}, -1;", temporary);
+ AddLine("MOV.S {} (NAN.x), 0;", temporary);
+ AddLine("MOV.S {} (NAN.y), 0;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::FloatUnordered(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
+ AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("MOV.S {} (NAN.x), -1;", temporary);
+ AddLine("MOV.S {} (NAN.y), -1;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("IF CF.x;");
+ AddLine("MOV.S {}, -1;", temporary);
+ AddLine("ENDIF;");
+ return temporary;
+}
+
+std::string ARBDecompiler::Texture(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ std::string_view opcode = "TEX";
+ std::string extra;
+ if (meta.bias) {
+ ASSERT(!meta.lod);
+ opcode = "TXB";
+
+ if (swizzle < 4) {
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
+ } else {
+ const std::string bias = AllocTemporary();
+ AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
+ extra = fmt::format(" {},", bias);
+ }
+ }
+ if (meta.lod) {
+ ASSERT(!meta.bias);
+ opcode = "TXL";
+
+ if (swizzle < 4) {
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
+ } else {
+ const std::string lod = AllocTemporary();
+ AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
+ extra = fmt::format(" {},", lod);
+ }
+ }
+
+ AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureGather(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ std::string comp;
+ if (!meta.sampler.is_shadow) {
+ const auto& immediate = std::get<ImmediateNode>(*meta.component);
+ comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
+ }
+
+ AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const std::string temporary = AllocVectorTemporary();
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+
+ ASSERT(!meta.sampler.is_array);
+
+ const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
+ AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureQueryLod(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const std::string temporary = AllocVectorTemporary();
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+
+ ASSERT(!meta.sampler.is_array);
+
+ const std::size_t count = operation.GetOperandsCount();
+ for (std::size_t i = 0; i < count; ++i) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
+ AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
+ AddLine("TRUNC.S {}, {};", temporary, temporary);
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TexelFetch(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ if (!meta.sampler.is_buffer) {
+ ASSERT(swizzle < 4);
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
+ }
+ AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
+ BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureGradient(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const std::string ddx = AllocVectorTemporary();
+ const std::string ddy = AllocVectorTemporary();
+ const std::string coord = BuildCoords(operation).first;
+
+ const std::size_t num_components = meta.derivates.size() / 2;
+ for (std::size_t index = 0; index < num_components; ++index) {
+ const char swizzle = Swizzle(index);
+ AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
+ AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
+ }
+
+ const std::string_view result = coord;
+ AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
+ return fmt::format("{}.x", result);
+}
+
+std::string ARBDecompiler::ImageLoad(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t count = operation.GetOperandsCount();
+ const std::string_view type = ImageType(meta.image.type);
+
+ const std::string temporary = AllocVectorTemporary();
+ for (std::size_t i = 0; i < count; ++i) {
+ AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
+ AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::ImageStore(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t num_coords = operation.GetOperandsCount();
+ const std::size_t num_values = meta.values.size();
+ const std::string_view type = ImageType(meta.image.type);
+
+ const std::string coord = AllocVectorTemporary();
+ const std::string value = AllocVectorTemporary();
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
+ }
+ for (std::size_t i = 0; i < num_values; ++i) {
+ AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
+ }
+ AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
+ return {};
+}
+
+std::string ARBDecompiler::Branch(Operation operation) {
+ const auto target = std::get<ImmediateNode>(*operation[0]);
+ AddLine("MOV.U PC.x, {};", target.GetValue());
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::BranchIndirect(Operation operation) {
+ AddLine("MOV.U PC.x, {};", Visit(operation[0]));
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::PushFlowStack(Operation operation) {
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
+ const std::string_view stack_name = StackName(stack);
+ AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
+ AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
+ return {};
+}
+
+std::string ARBDecompiler::PopFlowStack(Operation operation) {
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ const std::string_view stack_name = StackName(stack);
+ AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
+ AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::Exit(Operation) {
+ Exit();
+ return {};
+}
+
+std::string ARBDecompiler::Discard(Operation) {
+ AddLine("KIL TR;");
+ return {};
+}
+
+std::string ARBDecompiler::EmitVertex(Operation) {
+ AddLine("EMIT;");
+ return {};
+}
+
+std::string ARBDecompiler::EndPrimitive(Operation) {
+ AddLine("ENDPRIM;");
+ return {};
+}
+
+std::string ARBDecompiler::InvocationId(Operation) {
+ return "primitive.invocation";
+}
+
+std::string ARBDecompiler::YNegate(Operation) {
+ LOG_WARNING(Render_OpenGL, "(STUBBED)");
+ std::string temporary = AllocTemporary();
+ AddLine("MOV.F {}, 1;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::ThreadId(Operation) {
+ return fmt::format("{}.threadid", StageInputName(stage));
+}
+
+std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "NV_shader_thread_shuffle is missing. Kepler or better is required.");
+ return Visit(operation[0]);
+ }
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
+ Visit(operation[1]));
+ AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::Barrier(Operation) {
+ AddLine("BAR;");
+ return {};
+}
+
+std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
+ AddLine("MEMBAR.CTA;");
+ return {};
+}
+
+std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
+ AddLine("MEMBAR;");
+ return {};
+}
+
+} // Anonymous namespace
+
+std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier) {
+ return ARBDecompiler(device, ir, registry, stage, identifier).Code();
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
new file mode 100644
index 000000000..6afc87220
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "common/common_types.h"
+
+namespace Tegra::Engines {
+enum class ShaderType : u32;
+}
+
+namespace VideoCommon::Shader {
+class ShaderIR;
+class Registry;
+} // namespace VideoCommon::Shader
+
+namespace OpenGL {
+
+class Device;
+
+std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier);
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 9964ea894..b1c4cd62f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,22 +22,54 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
-CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
+Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
: VideoCommon::BufferBlock{cpu_addr, size} {
gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
+ if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
+ glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
+ glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+ }
+}
+
+Buffer::~Buffer() = default;
+
+void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
+ glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
+ data);
+}
+
+void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
+ MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
+ const GLintptr gl_offset = static_cast<GLintptr>(offset);
+ if (read_buffer.handle == 0) {
+ read_buffer.Create();
+ glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
+ GL_STREAM_READ);
+ }
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+ glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
+ glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
}
-CachedBufferBlock::~CachedBufferBlock() = default;
+void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size) {
+ glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
+ static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
+}
-OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- const Device& device, std::size_t stream_size)
- : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
+OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
+ const Device& device_, std::size_t stream_size)
+ : GenericBufferCache{rasterizer, gpu_memory, cpu_memory,
+ std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
+ device{device_} {
if (!device.HasFastBufferSubData()) {
return;
}
- static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
+ static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
for (const GLuint cbuf : cbufs) {
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@@ -48,44 +80,21 @@ OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
}
-Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<CachedBufferBlock>(cpu_addr, size);
+std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+ return std::make_shared<Buffer>(device, cpu_addr, size);
}
-GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
- return buffer->GetHandle();
-}
-
-GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
- return 0;
-}
-
-void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) {
- glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
- static_cast<GLsizeiptr>(size), data);
-}
-
-void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) {
- MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
- glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
- glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
- static_cast<GLsizeiptr>(size), data);
-}
-
-void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) {
- glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
- static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
+OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
+ return {0, 0, 0};
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
std::size_t size) {
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
- const GLuint& cbuf = cbufs[cbuf_cursor++];
+ const GLuint cbuf = cbufs[cbuf_cursor++];
+
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
- return {cbuf, 0};
+ return {cbuf, 0, 0};
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7..f75b32e31 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -24,57 +23,59 @@ class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
-class CachedBufferBlock;
+class Buffer : public VideoCommon::BufferBlock {
+public:
+ explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
+ ~Buffer();
-using Buffer = std::shared_ptr<CachedBufferBlock>;
-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
+ void Upload(std::size_t offset, std::size_t size, const u8* data);
-class CachedBufferBlock : public VideoCommon::BufferBlock {
-public:
- explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
- ~CachedBufferBlock();
+ void Download(std::size_t offset, std::size_t size, u8* data);
+
+ void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size);
- GLuint GetHandle() const {
+ GLuint Handle() const noexcept {
return gl_buffer.handle;
}
+ u64 Address() const noexcept {
+ return gpu_address;
+ }
+
private:
OGLBuffer gl_buffer;
+ OGLBuffer read_buffer;
+ u64 gpu_address = 0;
};
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
public:
- explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
+ explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, std::size_t stream_size);
~OGLBufferCache();
- GLuint GetEmptyBuffer(std::size_t) override;
+ BufferInfo GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept {
cbuf_cursor = 0;
}
protected:
- Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
-
- GLuint ToHandle(const Buffer& buffer) override;
-
- void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) override;
-
- void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) override;
-
- void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) override;
+ std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
private:
+ static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+
+ const Device& device;
+
std::size_t cbuf_cursor = 0;
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
- cbufs;
+ std::array<GLuint, NUM_CBUFS> cbufs{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b772c37d9..e7d95149f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -123,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
u32 base_images = 0;
- // Reserve more image bindings on fragment and vertex stages.
+ // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
+ // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
+ // fragment stage, and at least 1 for the rest of the stages.
+ // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
+
+ // Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
- Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
- bindings[0].image =
- Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
+ Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
+
+ // This is guaranteed to be at least 1.
+ const u32 total_extracted_images = num_images / (NumStages - 1);
// Reserve the other image bindings.
- const u32 total_extracted_images = num_images / (NumStages - 2);
- for (std::size_t i = 2; i < NumStages; ++i) {
+ for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
+ if (stage == 4) {
+ continue;
+ }
bindings[stage].image =
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
}
@@ -170,7 +178,7 @@ bool IsASTCSupported() {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
GLint value;
- glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value);
+ glGetInternalformativ(target, format, support, 1, &value);
if (value != GL_FULL_SUPPORT) {
return false;
}
@@ -185,34 +193,54 @@ bool IsASTCSupported() {
Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
- const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
+ const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
+ const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
+ bool disable_fast_buffer_sub_data = false;
+ if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
+ LOG_WARNING(
+ Render_OpenGL,
+ "Beta driver 443.24 is known to have issues. There might be performance issues.");
+ disable_fast_buffer_sub_data = true;
+ }
+
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
+ max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
+ has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
has_astc = IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
- has_fast_buffer_sub_data = is_nvidia;
- use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
- GLAD_GL_NV_compute_program5;
+ has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
+ has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
+
+ // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
+ // uniform buffers as "push constants"
+ has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
+
+ use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
+ GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
+ GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
+
+ use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
- if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
+ if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
}
}
@@ -223,10 +251,12 @@ Device::Device(std::nullptr_t) {
shader_storage_alignment = 4;
max_vertex_attributes = 16;
max_varyings = 15;
+ max_compute_shared_memory_size = 0x10000;
has_warp_intrinsics = true;
has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
+ has_texture_shadow_lod = true;
has_variable_aoffi = true;
}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 98cca0254..8a4b6b9fc 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -52,6 +52,10 @@ public:
return max_varyings;
}
+ u32 GetMaxComputeSharedMemorySize() const {
+ return max_compute_shared_memory_size;
+ }
+
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@@ -68,6 +72,14 @@ public:
return has_image_load_formatted;
}
+ bool HasTextureShadowLod() const {
+ return has_texture_shadow_lod;
+ }
+
+ bool HasVertexBufferUnifiedMemory() const {
+ return has_vertex_buffer_unified_memory;
+ }
+
bool HasASTC() const {
return has_astc;
}
@@ -88,10 +100,18 @@ public:
return has_fast_buffer_sub_data;
}
+ bool HasNvViewportArray2() const {
+ return has_nv_viewport_array2;
+ }
+
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
+ bool UseAsynchronousShaders() const {
+ return use_asynchronous_shaders;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -102,16 +122,21 @@ private:
std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
+ u32 max_compute_shared_memory_size{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
+ bool has_texture_shadow_lod{};
+ bool has_vertex_buffer_unified_memory{};
bool has_astc{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
bool has_fast_buffer_sub_data{};
+ bool has_nv_viewport_array2{};
bool use_assembly_shaders{};
+ bool use_asynchronous_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index ec5421afa..b532fdcc2 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,16 +4,17 @@
#include "common/assert.h"
+#include <glad/glad.h>
+
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
-GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
+GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
+ : FenceBase(address, payload, is_stubbed) {}
GLInnerFence::~GLInnerFence() = default;
@@ -44,11 +45,10 @@ void GLInnerFence::Wait() {
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
-FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
+FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCacheOpenGL& texture_cache,
OGLBufferCache& buffer_cache, QueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
+ : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index c917b3343..da1dcdace 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -5,7 +5,6 @@
#pragma once
#include <memory>
-#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
@@ -38,9 +37,9 @@ using GenericFenceManager =
class FenceManagerOpenGL final : public GenericFenceManager {
public:
- FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
- QueryCache& query_cache);
+ explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
+ QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index d7ba57aca..1a3d9720e 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -30,12 +30,11 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
- : VideoCommon::QueryCacheBase<
- QueryCache, CachedQuery, CounterStream, HostCounter,
- std::vector<OGLQuery>>{system,
- static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
- gl_rasterizer{gl_rasterizer} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory)
+ : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>(
+ rasterizer, maxwell3d, gpu_memory),
+ gl_rasterizer{rasterizer} {}
QueryCache::~QueryCache() = default;
@@ -90,6 +89,8 @@ u64 HostCounter::BlockingQuery() const {
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
+CachedQuery::~CachedQuery() = default;
+
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
: VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d8e7052a1..82cac51ee 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -26,10 +26,11 @@ class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
-class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
- HostCounter, std::vector<OGLQuery>> {
+class QueryCache final
+ : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+ explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -40,6 +41,7 @@ public:
private:
RasterizerOpenGL& gl_rasterizer;
+ std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
@@ -62,10 +64,12 @@ class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
u8* host_ptr);
- CachedQuery(CachedQuery&& rhs) noexcept;
- CachedQuery(const CachedQuery&) = delete;
+ ~CachedQuery() override;
+ CachedQuery(CachedQuery&& rhs) noexcept;
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
+
+ CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 55e79aaf6..bbb2eb17c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/shader_cache.h"
namespace OpenGL {
@@ -60,15 +61,28 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
-constexpr std::size_t NumSupportedVertexAttributes = 16;
+constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
+constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
ShaderType shader_type, std::size_t index = 0) {
+ if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+ if (entry.is_separated) {
+ const u32 buffer_1 = entry.buffer;
+ const u32 buffer_2 = entry.secondary_buffer;
+ const u32 offset_1 = entry.offset;
+ const u32 offset_2 = entry.secondary_offset;
+ const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
+ return engine.GetTextureInfo(handle_1 | handle_2);
+ }
+ }
if (entry.is_bindless) {
- const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return engine.GetTextureInfo(tex_handle);
+ const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+ return engine.GetTextureInfo(handle);
}
+
const auto& gpu_profile = engine.AccessGuestDriverProfile();
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -93,21 +107,65 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
return buffer.size;
}
+/// Translates hardware transform feedback indices
+/// @param location Hardware location
+/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
+/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
+std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
+ const u8 index = location / 4;
+ if (index >= 8 && index <= 39) {
+ return {GL_GENERIC_ATTRIB_NV, index - 8};
+ }
+ if (index >= 48 && index <= 55) {
+ return {GL_TEXTURE_COORD_NV, index - 48};
+ }
+ switch (index) {
+ case 7:
+ return {GL_POSITION, 0};
+ case 40:
+ return {GL_PRIMARY_COLOR_NV, 0};
+ case 41:
+ return {GL_SECONDARY_COLOR_NV, 0};
+ case 42:
+ return {GL_BACK_PRIMARY_COLOR_NV, 0};
+ case 43:
+ return {GL_BACK_SECONDARY_COLOR_NV, 0};
+ }
+ UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
+ return {GL_POSITION, 0};
+}
+
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
+void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
+ if (num_entries == 0) {
+ return;
+ }
+ if (num_entries % 2 == 1) {
+ pointers[num_entries] = 0;
+ }
+ const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
+ glProgramLocalParametersI4uivNV(target, 0, num_vectors,
+ reinterpret_cast<const GLuint*>(pointers));
+}
+
} // Anonymous namespace
-RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker)
- : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
- state_tracker},
- shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
- buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
- fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
- screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+ Core::Memory::Memory& cpu_memory, const Device& device_,
+ ScreenInfo& screen_info_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_)
+ : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
+ kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
+ screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
+ texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
+ shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+ query_cache(*this, maxwell3d, gpu_memory),
+ buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+ async_shaders(emu_window) {
CheckExtensions();
unified_uniform_buffer.Create();
@@ -120,6 +178,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
nullptr, 0);
}
}
+
+ if (device.UseAsynchronousShaders()) {
+ async_shaders.AllocateWorkers();
+ }
}
RasterizerOpenGL::~RasterizerOpenGL() {
@@ -137,8 +199,7 @@ void RasterizerOpenGL::CheckExtensions() {
}
void RasterizerOpenGL::SetupVertexFormat() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
@@ -152,13 +213,13 @@ void RasterizerOpenGL::SetupVertexFormat() {
// avoid OpenGL errors.
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
// assume every shader uses them all.
- for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexFormat0 + index]) {
continue;
}
flags[Dirty::VertexFormat0 + index] = false;
- const auto attrib = gpu.regs.vertex_attrib_format[index];
+ const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
@@ -171,9 +232,10 @@ void RasterizerOpenGL::SetupVertexFormat() {
if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
- MaxwellToGL::VertexType(attrib), attrib.offset);
+ MaxwellToGL::VertexFormat(attrib), attrib.offset);
} else {
- glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+ glVertexAttribFormat(gl_index, attrib.ComponentCount(),
+ MaxwellToGL::VertexFormat(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
}
glVertexAttribBinding(gl_index, attrib.buffer);
@@ -181,8 +243,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
void RasterizerOpenGL::SetupVertexBuffer() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexBuffers]) {
return;
}
@@ -190,9 +251,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
MICROPROFILE_SCOPE(OpenGL_VB);
+ const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
+
// Upload all guest vertex arrays sequentially to our buffer
- const auto& regs = gpu.regs;
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const auto& regs = maxwell3d.regs;
+ for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
@@ -205,29 +268,37 @@ void RasterizerOpenGL::SetupVertexBuffer() {
const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
ASSERT(end >= start);
+
+ const GLuint gl_index = static_cast<GLuint>(index);
const u64 size = end - start;
if (size == 0) {
- glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
+ glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
+ if (use_unified_memory) {
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
+ }
continue;
}
- const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
- glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
- vertex_array.stride);
+ const auto info = buffer_cache.UploadMemory(start, size);
+ if (use_unified_memory) {
+ glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
+ info.address + info.offset, size);
+ } else {
+ glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
+ }
}
}
void RasterizerOpenGL::SetupVertexInstances() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
}
flags[Dirty::VertexInstances] = false;
- const auto& regs = gpu.regs;
- for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ const auto& regs = maxwell3d.regs;
+ for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) {
continue;
}
@@ -242,25 +313,23 @@ void RasterizerOpenGL::SetupVertexInstances() {
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index);
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t size = CalculateIndexBufferSize();
- const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
- return offset;
+ const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
+ return info.offset;
}
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
- auto& gpu = system.GPU().Maxwell3D();
- std::size_t num_ssbos = 0;
u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = gpu.regs.shader_config[index];
+ const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
program_manager.UseGeometryShader(0);
@@ -275,31 +344,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
// Currently this stages are not supported in the OpenGL backend.
- // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
- if (program == Maxwell::ShaderProgram::TesselationControl) {
- continue;
- } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+ // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+ if (program == Maxwell::ShaderProgram::TesselationControl ||
+ program == Maxwell::ShaderProgram::TesselationEval) {
continue;
}
- Shader shader{shader_cache.GetStageProgram(program)};
-
- if (device.UseAssemblyShaders()) {
- // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
- // all stages share the same bindings.
- const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
- ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
- num_ssbos += num_stage_ssbos;
- }
-
- // Stage indices are 0 - 5
- const std::size_t stage = index == 0 ? 0 : index - 1;
- SetupDrawConstBuffers(stage, shader);
- SetupDrawGlobalMemory(stage, shader);
- SetupDrawTextures(stage, shader);
- SetupDrawImages(stage, shader);
+ Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
- const GLuint program_handle = shader->GetHandle();
+ const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
@@ -316,6 +369,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_config.enable.Value(), shader_config.offset);
}
+ // Stage indices are 0 - 5
+ const std::size_t stage = index == 0 ? 0 : index - 1;
+ SetupDrawConstBuffers(stage, shader);
+ SetupDrawGlobalMemory(stage, shader);
+ SetupDrawTextures(stage, shader);
+ SetupDrawImages(stage, shader);
+
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -330,11 +390,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
SyncClipEnabled(clip_distances);
- gpu.dirty.flags[Dirty::Shaders] = false;
+ maxwell3d.dirty.flags[Dirty::Shaders] = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -352,34 +412,27 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
}
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+ return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+ static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
-void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
+void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(stop_loading, callback);
-}
-
-void RasterizerOpenGL::SetupDirtyFlags() {
- state_tracker.Initialize();
+ shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
+ if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
return;
}
- gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true);
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
@@ -411,8 +464,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
}
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
- auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
texture_cache.GuardRenderTargets(true);
View color_surface;
@@ -462,12 +514,11 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_de
}
void RasterizerOpenGL::Clear() {
- const auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.ShouldExecute()) {
+ if (!maxwell3d.ShouldExecute()) {
return;
}
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
@@ -532,7 +583,6 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
- auto& gpu = system.GPU().Maxwell3D();
query_cache.UpdateCounters();
@@ -576,7 +626,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
// Prepare the vertex array.
- buffer_cache.Map(buffer_size);
+ const bool invalidated = buffer_cache.Map(buffer_size);
+
+ if (invalidated) {
+ // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
+ auto& dirty = maxwell3d.dirty.flags;
+ dirty[Dirty::VertexBuffers] = true;
+ for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
+ dirty[index] = true;
+ }
+ }
// Prepare vertex array format.
SetupVertexFormat();
@@ -592,16 +651,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup emulation uniform buffer.
if (!device.UseAssemblyShaders()) {
MaxwellUniformData ubo;
- ubo.SetFromRegs(gpu);
- const auto [buffer, offset] =
+ ubo.SetFromRegs(maxwell3d);
+ const auto info =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
- glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
+ glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
}
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
- const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
+ const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
SetupShaders(primitive_mode);
texture_cache.GuardSamplers(false);
@@ -618,14 +677,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
BeginTransformFeedback(primitive_mode);
- const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+ const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
- static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+ static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
if (is_indexed) {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
- const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+ const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
@@ -644,8 +703,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
base_instance);
}
} else {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
@@ -660,7 +719,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands;
- system.GPU().TickWork();
+ gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -668,6 +727,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
current_cbuf = 0;
auto kernel = shader_cache.GetComputeKernel(code_addr);
+ program_manager.BindCompute(kernel->GetHandle());
+
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
@@ -681,7 +742,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
@@ -744,17 +805,14 @@ void RasterizerOpenGL::SyncGuestHost() {
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- auto& memory_manager{gpu.MemoryManager()};
- memory_manager.Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -763,7 +821,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
}
void RasterizerOpenGL::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -842,14 +899,14 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
static constexpr std::array PARAMETER_LUT = {
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
+ const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index];
const auto& entries = shader->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
@@ -872,9 +929,9 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
}
}
-void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
const auto& entries = kernel->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
@@ -906,8 +963,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
if (device.UseAssemblyShaders()) {
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
} else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding,
- buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
}
return;
}
@@ -920,68 +976,89 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
const GPUVAddr gpu_addr = buffer.address;
- auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
+ auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
if (device.UseAssemblyShaders()) {
UNIMPLEMENTED_IF(use_unified);
- if (offset != 0) {
+ if (info.offset != 0) {
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
- glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
- cbuf = staging_cbuf;
- offset = 0;
+ glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
+ info.handle = staging_cbuf;
+ info.offset = 0;
}
- glBindBufferRangeNV(stage, binding, cbuf, offset, size);
+ glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
return;
}
if (use_unified) {
- glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
+ glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
+ unified_offset, size);
} else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
}
}
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
+void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
+ static constexpr std::array TARGET_LUT = {
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+ };
+
+ const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
+ const auto& entries{shader->GetEntries().global_memory_entries};
- u32 binding =
- device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
- for (const auto& entry : shader->GetEntries().global_memory_entries) {
+ std::array<GLuint64EXT, 32> pointers;
+ ASSERT(entries.size() < pointers.size());
+
+ const bool assembly_shaders = device.UseAssemblyShaders();
+ u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
+ for (const auto& entry : entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
- const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
- const u32 size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding++, entry, gpu_addr, size);
+ const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+ const u32 size{gpu_memory.Read<u32>(addr + 8)};
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ ++binding;
+ }
+ if (assembly_shaders) {
+ UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
}
}
-void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
+void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
+ const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
+ const auto& entries{kernel->GetEntries().global_memory_entries};
+
+ std::array<GLuint64EXT, 32> pointers;
+ ASSERT(entries.size() < pointers.size());
u32 binding = 0;
- for (const auto& entry : kernel->GetEntries().global_memory_entries) {
- const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
- const auto gpu_addr{memory_manager.Read<u64>(addr)};
- const auto size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding++, entry, gpu_addr, size);
+ for (const auto& entry : entries) {
+ const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
+ const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+ const u32 size{gpu_memory.Read<u32>(addr + 8)};
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ ++binding;
+ }
+ if (device.UseAssemblyShaders()) {
+ UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
- GPUVAddr gpu_addr, std::size_t size) {
- const auto alignment{device.GetShaderStorageBufferAlignment()};
- const auto [ssbo, buffer_offset] =
- buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
- static_cast<GLsizeiptr>(size));
+ GPUVAddr gpu_addr, std::size_t size,
+ GLuint64EXT* pointer) {
+ const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
+ const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
+ if (device.UseAssemblyShaders()) {
+ *pointer = info.address + info.offset;
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
+ static_cast<GLsizeiptr>(size));
+ }
}
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
@@ -992,13 +1069,12 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
}
}
-void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : kernel->GetEntries().samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
+ const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
}
}
@@ -1021,21 +1097,19 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
}
}
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
- const auto& maxwell3d = system.GPU().Maxwell3D();
+void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
+ const auto shader_type = static_cast<ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
}
}
-void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
- const auto& compute = system.GPU().KeplerCompute();
+void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
u32 binding = 0;
for (const auto& entry : shader->GetEntries().images) {
- const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
+ const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
}
}
@@ -1055,9 +1129,8 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
}
void RasterizerOpenGL::SyncViewport() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
const bool dirty_clip_control = flags[Dirty::ClipControl];
@@ -1129,25 +1202,23 @@ void RasterizerOpenGL::SyncViewport() {
}
void RasterizerOpenGL::SyncDepthClamp() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::DepthClampEnabled]) {
return;
}
flags[Dirty::DepthClampEnabled] = false;
- oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0);
+ oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
- clip_mask &= gpu.regs.clip_distance_enabled;
+ clip_mask &= maxwell3d.regs.clip_distance_enabled;
if (clip_mask == last_clip_distance_mask) {
return;
}
@@ -1163,9 +1234,8 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::CullTest]) {
flags[Dirty::CullTest] = false;
@@ -1180,26 +1250,24 @@ void RasterizerOpenGL::SyncCullMode() {
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PrimitiveRestart]) {
return;
}
flags[Dirty::PrimitiveRestart] = false;
- if (gpu.regs.primitive_restart.enabled) {
+ if (maxwell3d.regs.primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
- glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
+ glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
void RasterizerOpenGL::SyncDepthTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
- const auto& regs = gpu.regs;
if (flags[Dirty::DepthMask]) {
flags[Dirty::DepthMask] = false;
glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
@@ -1217,14 +1285,13 @@ void RasterizerOpenGL::SyncDepthTestState() {
}
void RasterizerOpenGL::SyncStencilTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::StencilTest]) {
return;
}
flags[Dirty::StencilTest] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
@@ -1249,25 +1316,24 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
void RasterizerOpenGL::SyncRasterizeEnable() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::RasterizeEnable]) {
return;
}
flags[Dirty::RasterizeEnable] = false;
- oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
+ oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
- if (gpu.regs.fill_rectangle) {
+ const auto& regs = maxwell3d.regs;
+ if (regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
@@ -1280,27 +1346,26 @@ void RasterizerOpenGL::SyncPolygonModes() {
return;
}
- if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
+ if (regs.polygon_mode_front == regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
return;
}
if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false;
- glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
}
if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
+ glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
}
}
void RasterizerOpenGL::SyncColorMask() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ColorMasks]) {
return;
}
@@ -1309,7 +1374,7 @@ void RasterizerOpenGL::SyncColorMask() {
const bool force = flags[Dirty::ColorMaskCommon];
flags[Dirty::ColorMaskCommon] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.color_mask_common) {
if (!force && !flags[Dirty::ColorMask0]) {
return;
@@ -1334,33 +1399,30 @@ void RasterizerOpenGL::SyncColorMask() {
}
void RasterizerOpenGL::SyncMultiSampleState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::MultisampleControl]) {
return;
}
flags[Dirty::MultisampleControl] = false;
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FragmentClampColor]) {
return;
}
flags[Dirty::FragmentClampColor] = false;
- glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::BlendColor]) {
flags[Dirty::BlendColor] = false;
@@ -1417,14 +1479,13 @@ void RasterizerOpenGL::SyncBlendState() {
}
void RasterizerOpenGL::SyncLogicOpState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LogicOp]) {
return;
}
flags[Dirty::LogicOp] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.logic_op.enable) {
glEnable(GL_COLOR_LOGIC_OP);
glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
@@ -1434,14 +1495,13 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::Scissors]) {
return;
}
flags[Dirty::Scissors] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!flags[Dirty::Scissor0 + index]) {
continue;
@@ -1460,16 +1520,15 @@ void RasterizerOpenGL::SyncScissorTest() {
}
void RasterizerOpenGL::SyncPointState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PointSize]) {
return;
}
flags[Dirty::PointSize] = false;
- oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
+ oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
- if (gpu.regs.vp_point_size.enable) {
+ if (maxwell3d.regs.vp_point_size.enable) {
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
glEnable(GL_PROGRAM_POINT_SIZE);
return;
@@ -1477,32 +1536,30 @@ void RasterizerOpenGL::SyncPointState() {
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
- glPointSize(std::max(1.0f, gpu.regs.point_size));
+ glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncLineState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LineWidth]) {
return;
}
flags[Dirty::LineWidth] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
}
void RasterizerOpenGL::SyncPolygonOffset() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonOffset]) {
return;
}
flags[Dirty::PolygonOffset] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
@@ -1516,14 +1573,13 @@ void RasterizerOpenGL::SyncPolygonOffset() {
}
void RasterizerOpenGL::SyncAlphaTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::AlphaTest]) {
return;
}
flags[Dirty::AlphaTest] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
}
@@ -1537,22 +1593,79 @@ void RasterizerOpenGL::SyncAlphaTest() {
}
void RasterizerOpenGL::SyncFramebufferSRGB() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FramebufferSRGB]) {
return;
}
flags[Dirty::FramebufferSRGB] = false;
- oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
+ oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
+}
+
+void RasterizerOpenGL::SyncTransformFeedback() {
+ // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
+ // when this is required.
+ const auto& regs = maxwell3d.regs;
+
+ static constexpr std::size_t STRIDE = 3;
+ std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
+ std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
+
+ GLint* cursor = attribs.data();
+ GLint* current_stream = streams.data();
+
+ for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
+ const auto& layout = regs.tfb_layouts[feedback];
+ UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
+ if (layout.varying_count == 0) {
+ continue;
+ }
+
+ *current_stream = static_cast<GLint>(feedback);
+ if (current_stream != streams.data()) {
+ // When stepping one stream, push the expected token
+ cursor[0] = GL_NEXT_BUFFER_NV;
+ cursor[1] = 0;
+ cursor[2] = 0;
+ cursor += STRIDE;
+ }
+ ++current_stream;
+
+ const auto& locations = regs.tfb_varying_locs[feedback];
+ std::optional<u8> current_index;
+ for (u32 offset = 0; offset < layout.varying_count; ++offset) {
+ const u8 location = locations[offset];
+ const u8 index = location / 4;
+
+ if (current_index == index) {
+ // Increase number of components of the previous attachment
+ ++cursor[-2];
+ continue;
+ }
+ current_index = index;
+
+ std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
+ cursor[1] = 1;
+ cursor += STRIDE;
+ }
+ }
+
+ const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
+ const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
+ glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
+ GL_INTERLEAVED_ATTRIBS);
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
+ if (device.UseAssemblyShaders()) {
+ SyncTransformFeedback();
+ }
+
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1579,11 +1692,15 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
static_cast<GLsizeiptr>(size));
}
+ // We may have to call BeginTransformFeedbackNV here since they seem to call different
+ // implementations on Nvidia's driver (the pointer is different) but we are using
+ // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
+ // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
glBeginTransformFeedback(GL_POINTS);
}
void RasterizerOpenGL::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -1600,8 +1717,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
const GLuint handle = transform_feedback_buffers[index].handle;
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
- const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
+ const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
+ static_cast<GLsizeiptr>(size));
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f5dc56a0e..f451404b2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_accelerated.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
@@ -34,10 +33,11 @@
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
-namespace Core {
-class System;
+namespace Core::Memory {
+class Memory;
}
namespace Core::Frontend {
@@ -55,9 +55,10 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker);
+ explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ Core::Memory::Memory& cpu_memory, const Device& device,
+ ScreenInfo& screen_info, ProgramManager& program_manager,
+ StateTracker& state_tracker);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -83,15 +84,22 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void LoadDiskResources(const std::atomic_bool& stop_loading,
+ void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
- void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
return num_queued_commands > 0;
}
+ VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+ return async_shaders;
+ }
+
+ const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+ return async_shaders;
+ }
+
private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
@@ -100,10 +108,10 @@ private:
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
/// Configures the current constbuffers to use for the draw command.
- void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
+ void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
/// Configures the current constbuffers to use for the kernel invocation.
- void SetupComputeConstBuffers(const Shader& kernel);
+ void SetupComputeConstBuffers(Shader* kernel);
/// Configures a constant buffer.
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
@@ -111,30 +119,30 @@ private:
std::size_t unified_offset);
/// Configures the current global memory entries to use for the draw command.
- void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
+ void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
/// Configures the current global memory entries to use for the kernel invocation.
- void SetupComputeGlobalMemory(const Shader& kernel);
+ void SetupComputeGlobalMemory(Shader* kernel);
- /// Configures a constant buffer.
+ /// Configures a global memory buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
- std::size_t size);
+ std::size_t size, GLuint64EXT* pointer);
/// Configures the current textures to use for the draw command.
- void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
+ void SetupDrawTextures(std::size_t stage_index, Shader* shader);
/// Configures the textures used in a compute shader.
- void SetupComputeTextures(const Shader& kernel);
+ void SetupComputeTextures(Shader* kernel);
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const SamplerEntry& entry);
/// Configures images in a graphics shader.
- void SetupDrawImages(std::size_t stage_index, const Shader& shader);
+ void SetupDrawImages(std::size_t stage_index, Shader* shader);
/// Configures images in a compute shader.
- void SetupComputeImages(const Shader& shader);
+ void SetupComputeImages(Shader* shader);
/// Configures an image.
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
@@ -202,6 +210,10 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
+ /// Syncs transform feedback state to match guest state
+ /// @note Only valid on assembly shaders
+ void SyncTransformFeedback();
+
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);
@@ -225,7 +237,15 @@ private:
void SetupShaders(GLenum primitive_mode);
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
const Device& device;
+ ScreenInfo& screen_info;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
@@ -235,10 +255,7 @@ private:
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
- Core::System& system;
- ScreenInfo& screen_info;
- ProgramManager& program_manager;
- StateTracker& state_tracker;
+ VideoCommon::Shader::AsyncShaders async_shaders;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index a787e27d2..0ebcec427 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -82,11 +83,13 @@ void OGLSampler::Release() {
handle = 0;
}
-void OGLShader::Create(const char* source, GLenum type) {
- if (handle != 0)
+void OGLShader::Create(std::string_view source, GLenum type) {
+ if (handle != 0) {
return;
- if (source == nullptr)
+ }
+ if (source.empty()) {
return;
+ }
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = GLShader::LoadShader(source, type);
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f8b322227..f48398669 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -4,6 +4,7 @@
#pragma once
+#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -127,7 +128,7 @@ public:
return *this;
}
- void Create(const char* source, GLenum type);
+ void Create(std::string_view source, GLenum type);
void Release();
@@ -177,6 +178,12 @@ public:
Release();
}
+ OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
+ return *this;
+ }
+
/// Deletes the internal OpenGL resource
void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a991ca64a..bd56bed0c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,7 +20,9 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
@@ -29,6 +31,8 @@
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
namespace OpenGL {
@@ -123,7 +127,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
entry.graphics_info, entry.compute_info};
- const auto registry = std::make_shared<Registry>(entry.type, info);
+ auto registry = std::make_shared<Registry>(entry.type, info);
for (const auto& [address, value] : entry.keys) {
const auto [buffer, offset] = address;
registry->InsertKey(buffer, offset, value);
@@ -138,16 +142,32 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
return registry;
}
+std::unordered_set<GLenum> GetSupportedFormats() {
+ GLint num_formats;
+ glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+
+ std::vector<GLint> formats(num_formats);
+ glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+
+ std::unordered_set<GLenum> supported_formats;
+ for (const GLint format : formats) {
+ supported_formats.insert(static_cast<GLenum>(format));
+ }
+ return supported_formats;
+}
+
+} // Anonymous namespace
+
ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
- const ShaderIR& ir, const Registry& registry,
- bool hint_retrievable = false) {
+ const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
auto program = std::make_shared<ProgramHandle>();
if (device.UseAssemblyShaders()) {
- const std::string arb = "Not implemented";
+ const std::string arb =
+ DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
GLuint& arb_prog = program->assembly_program.handle;
@@ -178,79 +198,98 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
return program;
}
-std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats;
- glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
-
- std::vector<GLint> formats(num_formats);
- glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
-
- std::unordered_set<GLenum> supported_formats;
- for (const GLint format : formats) {
- supported_formats.insert(static_cast<GLenum>(format));
- }
- return supported_formats;
-}
-
-} // Anonymous namespace
-
-CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
- std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, ProgramSharedPtr program_)
- : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
- size_in_bytes{size_in_bytes}, program{std::move(program_)} {
- // Assign either the assembly program or source program. We can't have both.
+Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
+ ProgramSharedPtr program_, bool is_built)
+ : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
+ is_built(is_built) {
handle = program->assembly_program.handle;
if (handle == 0) {
handle = program->source_program.handle;
}
- ASSERT(handle != 0);
+ if (is_built) {
+ ASSERT(handle != 0);
+ }
}
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
-GLuint CachedShader::GetHandle() const {
+GLuint Shader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent());
return handle;
}
-Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type, ProgramCode code,
- ProgramCode code_b) {
+bool Shader::IsBuilt() const {
+ return is_built;
+}
+
+void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
+ program->source_program = std::move(new_program);
+ handle = program->source_program.handle;
+ is_built = true;
+}
+
+void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
+ program->assembly_program = std::move(new_program);
+ handle = program->assembly_program.handle;
+ is_built = true;
+}
+
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(
+ const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
+ ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
const auto shader_type = GetShaderType(program_type);
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (!code_b.empty()) {
- // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
- // }
- auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+ auto& gpu = params.gpu;
+ gpu.ShaderNotify().MarkSharderBuilding();
+
+ auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
+ if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
+ const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+ // TODO(Rodrigo): Handle VertexA shaders
+ // std::optional<ShaderIR> ir_b;
+ // if (!code_b.empty()) {
+ // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
+ // }
+ auto program =
+ BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+ ShaderDiskCacheEntry entry;
+ entry.type = shader_type;
+ entry.code = std::move(code);
+ entry.code_b = std::move(code_b);
+ entry.unique_identifier = params.unique_identifier;
+ entry.bound_buffer = registry->GetBoundBuffer();
+ entry.graphics_info = registry->GetGraphicsInfo();
+ entry.keys = registry->GetKeys();
+ entry.bound_samplers = registry->GetBoundSamplers();
+ entry.bindless_samplers = registry->GetBindlessSamplers();
+ params.disk_cache.SaveEntry(std::move(entry));
+
+ gpu.ShaderNotify().MarkShaderComplete();
+
+ return std::unique_ptr<Shader>(new Shader(std::move(registry),
+ MakeEntries(params.device, ir, shader_type),
+ std::move(program), true));
+ } else {
+ // Required for entries
+ const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+ auto entries = MakeEntries(params.device, ir, shader_type);
- ShaderDiskCacheEntry entry;
- entry.type = shader_type;
- entry.code = std::move(code);
- entry.code_b = std::move(code_b);
- entry.unique_identifier = params.unique_identifier;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.graphics_info = registry->GetGraphicsInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
+ async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
+ std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
+ COMPILER_SETTINGS, *registry, cpu_addr);
- return std::shared_ptr<CachedShader>(
- new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
- MakeEntries(params.device, ir, shader_type), std::move(program)));
+ auto program = std::make_shared<ProgramHandle>();
+ return std::unique_ptr<Shader>(
+ new Shader(std::move(registry), std::move(entries), std::move(program), false));
+ }
}
-Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
+std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
+ ProgramCode code) {
+ auto& gpu = params.gpu;
+ gpu.ShaderNotify().MarkSharderBuilding();
- auto& engine = params.system.GPU().KeplerCompute();
- auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
+ auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
@@ -266,26 +305,33 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
- return std::shared_ptr<CachedShader>(
- new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
- MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
+ gpu.ShaderNotify().MarkShaderComplete();
+
+ return std::unique_ptr<Shader>(new Shader(std::move(registry),
+ MakeEntries(params.device, ir, ShaderType::Compute),
+ std::move(program)));
}
-Shader CachedShader::CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader,
- std::size_t size_in_bytes) {
- return std::shared_ptr<CachedShader>(
- new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
- precompiled_shader.entries, precompiled_shader.program));
+std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader) {
+ return std::unique_ptr<Shader>(new Shader(
+ precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device)
- : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
- disk_cache{system} {}
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer,
+ Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_)
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_},
+ gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, device{device_} {}
+
+ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
+ disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
@@ -361,7 +407,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
};
- const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
+ const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
const std::size_t bucket_size{transferable->size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
@@ -436,87 +482,122 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
return program;
}
-Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
- return last_shaders[static_cast<std::size_t>(program)];
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
+ VideoCommon::Shader::AsyncShaders& async_shaders) {
+ if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
+ auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
+ if (last_shader->IsBuilt()) {
+ return last_shader;
+ }
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr address{GetShaderAddress(system, program)};
+ const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
+
+ if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
+ auto completed_work = async_shaders.GetCompletedWork();
+ for (auto& work : completed_work) {
+ Shader* shader = TryGet(work.cpu_address);
+ gpu.ShaderNotify().MarkShaderComplete();
+ if (shader == nullptr) {
+ continue;
+ }
+ using namespace VideoCommon::Shader;
+ if (work.backend == AsyncShaders::Backend::OpenGL) {
+ shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
+ } else if (work.backend == AsyncShaders::Backend::GLASM) {
+ shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+ }
+
+ auto& registry = shader->GetRegistry();
+
+ ShaderDiskCacheEntry entry;
+ entry.type = work.shader_type;
+ entry.code = std::move(work.code);
+ entry.code_b = std::move(work.code_b);
+ entry.unique_identifier = work.uid;
+ entry.bound_buffer = registry.GetBoundBuffer();
+ entry.graphics_info = registry.GetGraphicsInfo();
+ entry.keys = registry.GetKeys();
+ entry.bound_samplers = registry.GetBoundSamplers();
+ entry.bindless_samplers = registry.GetBindlessSamplers();
+ disk_cache.SaveEntry(std::move(entry));
+ }
+ }
// Look up shader in the cache based on address
- const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
- Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
- if (shader) {
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
+ if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
- const auto host_ptr{memory_manager.GetPointer(address)};
+ const u8* const host_ptr{gpu_memory.GetPointer(address)};
// No shader found - create a new one
- ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
+ ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
ProgramCode code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = memory_manager.GetPointer(address_b);
- code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
+ const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
+ const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
+ code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
}
+ const std::size_t code_size = code.size() * sizeof(u64);
- const auto unique_identifier = GetUniqueIdentifier(
+ const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
+ std::unique_ptr<Shader> shader;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
- shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
- std::move(code_b));
+ shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
+ async_shaders, cpu_addr.value_or(0));
} else {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+ shader = Shader::CreateFromCache(params, found->second);
}
+ Shader* const result = shader.get();
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader), *cpu_addr, code_size);
} else {
- null_shader = shader;
+ null_shader = std::move(shader);
}
- return last_shaders[static_cast<std::size_t>(program)] = shader;
+ return last_shaders[static_cast<std::size_t>(program)] = result;
}
-Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
+Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
- auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
- if (kernel) {
+ if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel;
}
- const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
- auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
- const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
+ const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
+ ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
+ const std::size_t code_size{code.size() * sizeof(u64)};
+ const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
+ std::unique_ptr<Shader> kernel;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
- kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
+ kernel = Shader::CreateKernelFromMemory(params, std::move(code));
} else {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+ kernel = Shader::CreateFromCache(params, found->second);
}
+ Shader* const result = kernel.get();
if (cpu_addr) {
- Register(kernel);
+ Register(std::move(kernel), *cpu_addr, code_size);
} else {
- null_kernel = kernel;
+ null_kernel = std::move(kernel);
}
- return kernel;
+ return result;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b2ae8d7f9..1708af06a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,29 +18,30 @@
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
-namespace Core {
-class System;
+namespace Tegra {
+class MemoryManager;
}
namespace Core::Frontend {
class EmuWindow;
}
+namespace VideoCommon::Shader {
+class AsyncShaders;
+}
+
namespace OpenGL {
-class CachedShader;
class Device;
class RasterizerOpenGL;
-struct UnspecializedShader;
-using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ProgramHandle {
@@ -56,86 +57,104 @@ struct PrecompiledShader {
};
struct ShaderParameters {
- Core::System& system;
+ Tegra::GPU& gpu;
+ Tegra::Engines::ConstBufferEngineInterface& engine;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr;
- u8* host_ptr;
+ const u8* host_ptr;
u64 unique_identifier;
};
-class CachedShader final : public RasterizerCacheObject {
+ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
+ u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ bool hint_retrievable = false);
+
+class Shader final {
public:
- ~CachedShader();
+ ~Shader();
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
- /// Returns the size in bytes of the shader
- std::size_t GetSizeInBytes() const override {
- return size_in_bytes;
- }
+ bool IsBuilt() const;
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
- static Shader CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b);
- static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return *registry;
+ }
+
+ /// Mark a OpenGL shader as built
+ void AsyncOpenGLBuilt(OGLProgram new_program);
- static Shader CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader,
- std::size_t size_in_bytes);
+ /// Mark a GLASM shader as built
+ void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+
+ static std::unique_ptr<Shader> CreateStageFromMemory(
+ const ShaderParameters& params, Maxwell::ShaderProgram program_type,
+ ProgramCode program_code, ProgramCode program_code_b,
+ VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+
+ static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
+ ProgramCode code);
+
+ static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader);
private:
- explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
- std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, ProgramSharedPtr program);
+ explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
+ ProgramSharedPtr program, bool is_built = true);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
- std::size_t size_in_bytes = 0;
ProgramSharedPtr program;
GLuint handle = 0;
+ bool is_built{};
};
-class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
+class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device);
+ explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window,
+ Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const Device& device);
+ ~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
- void LoadDiskCache(const std::atomic_bool& stop_loading,
+ void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
- Shader GetStageProgram(Maxwell::ShaderProgram program);
+ Shader* GetStageProgram(Maxwell::ShaderProgram program,
+ VideoCommon::Shader::AsyncShaders& async_shaders);
/// Gets a compute kernel in the passed address
- Shader GetComputeKernel(GPUVAddr code_addr);
-
-protected:
- // We do not have to flush this cache as things in it are never modified by us.
- void FlushObjectInner(const Shader& object) override {}
+ Shader* GetComputeKernel(GPUVAddr code_addr);
private:
ProgramSharedPtr GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
- Core::System& system;
Core::Frontend::EmuWindow& emu_window;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
+
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
- Shader null_shader{};
- Shader null_kernel{};
+ std::unique_ptr<Shader> null_shader;
+ std::unique_ptr<Shader> null_kernel;
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d6e30b321..bbb8fb095 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
+using Tegra::Shader::TextureType;
using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
@@ -526,6 +527,9 @@ private:
if (device.HasImageLoadFormatted()) {
code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
}
+ if (device.HasTextureShadowLod()) {
+ code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
+ }
if (device.HasWarpIntrinsics()) {
code.AddLine("#extension GL_NV_gpu_shader5 : require");
code.AddLine("#extension GL_NV_shader_thread_group : require");
@@ -598,8 +602,15 @@ private:
return;
}
const auto& info = registry.GetComputeInfo();
- if (const u32 size = info.shared_memory_size_in_words; size > 0) {
- code.AddLine("shared uint smem[{}];", size);
+ if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ if (size > limit) {
+ LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+ size, limit);
+ size = limit;
+ }
+
+ code.AddLine("shared uint smem[{}];", size / 4);
code.AddNewLine();
}
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
@@ -802,7 +813,7 @@ private:
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
- return {};
+ return std::nullopt;
}
return it->second.components;
}
@@ -909,13 +920,13 @@ private:
return "samplerBuffer";
}
switch (sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
+ case TextureType::Texture1D:
return "sampler1D";
- case Tegra::Shader::TextureType::Texture2D:
+ case TextureType::Texture2D:
return "sampler2D";
- case Tegra::Shader::TextureType::Texture3D:
+ case TextureType::Texture3D:
return "sampler3D";
- case Tegra::Shader::TextureType::TextureCube:
+ case TextureType::TextureCube:
return "samplerCube";
default:
UNREACHABLE();
@@ -1284,21 +1295,21 @@ private:
switch (element) {
case 0:
UNIMPLEMENTED();
- return {};
+ return std::nullopt;
case 1:
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return {};
+ return std::nullopt;
}
return {{"gl_Layer", Type::Int}};
case 2:
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return {};
+ return std::nullopt;
}
return {{"gl_ViewportIndex", Type::Int}};
case 3:
return {{"gl_PointSize", Type::Float}};
}
- return {};
+ return std::nullopt;
case Attribute::Index::FrontColor:
return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::FrontSecondaryColor:
@@ -1321,7 +1332,7 @@ private:
Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
- return {};
+ return std::nullopt;
}
}
@@ -1380,8 +1391,19 @@ private:
const std::size_t count = operation.GetOperandsCount();
const bool has_array = meta->sampler.is_array;
const bool has_shadow = meta->sampler.is_shadow;
+ const bool workaround_lod_array_shadow_as_grad =
+ !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
+ ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
+ meta->sampler.type == TextureType::TextureCube);
+
+ std::string expr = "texture";
+
+ if (workaround_lod_array_shadow_as_grad) {
+ expr += "Grad";
+ } else {
+ expr += function_suffix;
+ }
- std::string expr = "texture" + function_suffix;
if (!meta->aoffi.empty()) {
expr += "Offset";
} else if (!meta->ptp.empty()) {
@@ -1415,6 +1437,18 @@ private:
expr += ')';
}
+ if (workaround_lod_array_shadow_as_grad) {
+ switch (meta->sampler.type) {
+ case TextureType::Texture2D:
+ return expr + ", vec2(0.0), vec2(0.0))";
+ case TextureType::TextureCube:
+ return expr + ", vec3(0.0), vec3(0.0))";
+ default:
+ UNREACHABLE();
+ break;
+ }
+ }
+
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
@@ -1887,7 +1921,7 @@ private:
Expression Comparison(Operation operation) {
static_assert(!unordered || type == Type::Float);
- const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
+ Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
// GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
@@ -1927,10 +1961,6 @@ private:
return {fmt::format("({} != 0)", carry), Type::Bool};
}
- Expression LogicalFIsNan(Operation operation) {
- return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
- }
-
Expression LogicalAssign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -2041,8 +2071,19 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(
- operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
+ std::string expr{};
+
+ if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
+ ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
+ meta->sampler.type == TextureType::TextureCube)) {
+ LOG_ERROR(Render_OpenGL,
+ "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
+ expr = GenerateTexture(operation, "Lod", {});
+ } else {
+ expr = GenerateTexture(operation, "Lod",
+ {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
+ }
+
if (meta->sampler.is_shadow) {
expr = "vec4(" + expr + ')';
}
@@ -2735,15 +2776,6 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
- bool IsRenderTargetEnabled(u32 render_target) const {
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- return true;
- }
- }
- return false;
- }
-
const Device& device;
const ShaderIR& ir;
const Registry& registry;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..166ee34e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
namespace {
+using VideoCommon::Shader::SeparateSamplerKey;
+
using ShaderCacheVersionHash = std::array<u8, 64>;
struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
u32 value = 0;
};
-struct BoundSamplerKey {
+struct BoundSamplerEntry {
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
-struct BindlessSamplerKey {
+struct SeparateSamplerEntry {
+ u32 cbuf1 = 0;
+ u32 cbuf2 = 0;
+ u32 offset1 = 0;
+ u32 offset2 = 0;
+ Tegra::Engines::SamplerDescriptor sampler;
+};
+
+struct BindlessSamplerEntry {
u32 cbuf = 0;
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
-constexpr u32 NativeVersion = 20;
+constexpr u32 NativeVersion = 21;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -63,7 +73,7 @@ ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
+bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
+ u32 num_separate_samplers;
u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
+ file.ReadArray(&num_separate_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false;
}
@@ -101,29 +113,38 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
}
std::vector<ConstBufferKey> flat_keys(num_keys);
- std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
- std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
+ std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
+ std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
+ std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
flat_bound_samplers.size() ||
+ file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
+ flat_separate_samplers.size() ||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
flat_bindless_samplers.size()) {
return false;
}
- for (const auto& key : flat_keys) {
- keys.insert({{key.cbuf, key.offset}, key.value});
+ for (const auto& entry : flat_keys) {
+ keys.insert({{entry.cbuf, entry.offset}, entry.value});
+ }
+ for (const auto& entry : flat_bound_samplers) {
+ bound_samplers.emplace(entry.offset, entry.sampler);
}
- for (const auto& key : flat_bound_samplers) {
- bound_samplers.emplace(key.offset, key.sampler);
+ for (const auto& entry : flat_separate_samplers) {
+ SeparateSamplerKey key;
+ key.buffers = {entry.cbuf1, entry.cbuf2};
+ key.offsets = {entry.offset1, entry.offset2};
+ separate_samplers.emplace(key, entry.sampler);
}
- for (const auto& key : flat_bindless_samplers) {
- bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+ for (const auto& entry : flat_bindless_samplers) {
+ bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
}
return true;
}
-bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
+bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
if (file.WriteObject(static_cast<u32>(type)) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
return false;
}
@@ -152,48 +174,64 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
}
- std::vector<BoundSamplerKey> flat_bound_samplers;
+ std::vector<BoundSamplerEntry> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) {
- flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
+ flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
}
- std::vector<BindlessSamplerKey> flat_bindless_samplers;
+ std::vector<SeparateSamplerEntry> flat_separate_samplers;
+ flat_separate_samplers.reserve(separate_samplers.size());
+ for (const auto& [key, sampler] : separate_samplers) {
+ SeparateSamplerEntry entry;
+ std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
+ std::tie(entry.offset1, entry.offset2) = key.offsets;
+ entry.sampler = sampler;
+ flat_separate_samplers.push_back(entry);
+ }
+
+ std::vector<BindlessSamplerEntry> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back(
- BindlessSamplerKey{address.first, address.second, sampler});
+ BindlessSamplerEntry{address.first, address.second, sampler});
}
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
flat_bound_samplers.size() &&
+ file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
+ flat_separate_samplers.size() &&
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
flat_bindless_samplers.size();
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
+void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
+ title_id = title_id_;
+}
+
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
- const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
- if (!Settings::values.use_disk_shader_cache || !has_title_id) {
- return {};
+ const bool has_title_id = title_id != 0;
+ if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
+ return std::nullopt;
}
- FileUtil::IOFile file(GetTransferablePath(), "rb");
+ Common::FS::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
- return {};
+ return std::nullopt;
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
- return {};
+ return std::nullopt;
}
if (version < NativeVersion) {
@@ -201,12 +239,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
file.Close();
InvalidateTransferable();
is_usable = true;
- return {};
+ return std::nullopt;
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
"of the emulator, skipping");
- return {};
+ return std::nullopt;
}
// Version is valid, load the shaders
@@ -215,7 +253,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
ShaderDiskCacheEntry& entry = entries.emplace_back();
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
- return {};
+ return std::nullopt;
}
}
@@ -228,7 +266,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
return {};
}
- FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+ Common::FS::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
@@ -245,7 +283,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
}
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
- FileUtil::IOFile& file) {
+ Common::FS::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
@@ -256,12 +294,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
ShaderCacheVersionHash file_hash{};
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
precompiled_cache_virtual_file_offset = 0;
- return {};
+ return std::nullopt;
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
precompiled_cache_virtual_file_offset = 0;
- return {};
+ return std::nullopt;
}
std::vector<ShaderDiskCachePrecompiled> entries;
@@ -271,19 +309,20 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
!LoadObjectFromPrecompiled(entry.binary_format) ||
!LoadObjectFromPrecompiled(binary_size)) {
- return {};
+ return std::nullopt;
}
entry.binary.resize(binary_size);
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
- return {};
+ return std::nullopt;
}
}
- return entries;
+
+ return std::move(entries);
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
- if (!FileUtil::Delete(GetTransferablePath())) {
+ if (!Common::FS::Delete(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
GetTransferablePath());
}
@@ -294,7 +333,7 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
// Clear virtaul precompiled cache file
precompiled_cache_virtual_file.Resize(0);
- if (!FileUtil::Delete(GetPrecompiledPath())) {
+ if (!Common::FS::Delete(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
}
}
@@ -310,7 +349,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
return;
}
- FileUtil::IOFile file = AppendTransferableFile();
+ Common::FS::IOFile file = AppendTransferableFile();
if (!file.IsOpen()) {
return;
}
@@ -352,15 +391,15 @@ void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint progra
}
}
-FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
+Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
if (!EnsureDirectories()) {
return {};
}
const auto transferable_path{GetTransferablePath()};
- const bool existed = FileUtil::Exists(transferable_path);
+ const bool existed = Common::FS::Exists(transferable_path);
- FileUtil::IOFile file(transferable_path, "ab");
+ Common::FS::IOFile file(transferable_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
return {};
@@ -392,7 +431,7 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path{GetPrecompiledPath()};
- FileUtil::IOFile file(precompiled_path, "wb");
+ Common::FS::IOFile file(precompiled_path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
@@ -406,24 +445,24 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
const auto CreateDir = [](const std::string& dir) {
- if (!FileUtil::CreateDir(dir)) {
+ if (!Common::FS::CreateDir(dir)) {
LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
return false;
}
return true;
};
- return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
+ return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) &&
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
CreateDir(GetPrecompiledDir());
}
std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
- return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+ return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
- return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+ return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
@@ -435,11 +474,11 @@ std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
}
std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
- return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
+ return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl";
}
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+ return fmt::format("{:016X}", title_id);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..aef841c1d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -21,11 +21,7 @@
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
-namespace Core {
-class System;
-}
-
-namespace FileUtil {
+namespace Common::FS {
class IOFile;
}
@@ -38,9 +34,9 @@ struct ShaderDiskCacheEntry {
ShaderDiskCacheEntry();
~ShaderDiskCacheEntry();
- bool Load(FileUtil::IOFile& file);
+ bool Load(Common::FS::IOFile& file);
- bool Save(FileUtil::IOFile& file) const;
+ bool Save(Common::FS::IOFile& file) const;
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
@@ -57,6 +53,7 @@ struct ShaderDiskCacheEntry {
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
+ VideoCommon::Shader::SeparateSamplerMap separate_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
@@ -69,9 +66,12 @@ struct ShaderDiskCachePrecompiled {
class ShaderDiskCacheOpenGL {
public:
- explicit ShaderDiskCacheOpenGL(Core::System& system);
+ explicit ShaderDiskCacheOpenGL();
~ShaderDiskCacheOpenGL();
+ /// Binds a title ID for all future operations.
+ void BindTitleID(u64 title_id);
+
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
@@ -96,10 +96,10 @@ public:
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
- FileUtil::IOFile& file);
+ Common::FS::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
- FileUtil::IOFile AppendTransferableFile() const;
+ Common::FS::IOFile AppendTransferableFile() const;
/// Save precompiled header to precompiled_cache_in_memory
void SavePrecompiledHeaderToVirtualPrecompiledCache();
@@ -156,8 +156,6 @@ private:
return LoadArrayFromPrecompiled(&object, 1);
}
- Core::System& system;
-
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
@@ -167,8 +165,11 @@ private:
// Stored transferable shaders
std::unordered_set<u64> stored_transferable;
+ /// Title ID to operate on
+ u64 title_id = 0;
+
// The cache has been loaded at boot
- bool is_usable{};
+ bool is_usable = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 8e754fa90..691c6c79b 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -11,8 +11,30 @@
namespace OpenGL {
-ProgramManager::ProgramManager(const Device& device) {
- use_assembly_programs = device.UseAssemblyShaders();
+namespace {
+
+void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
+ if (current == old) {
+ return;
+ }
+ if (current == 0) {
+ if (enabled) {
+ enabled = false;
+ glDisable(stage);
+ }
+ return;
+ }
+ if (!enabled) {
+ enabled = true;
+ glEnable(stage);
+ }
+ glBindProgramARB(stage, current);
+}
+
+} // Anonymous namespace
+
+ProgramManager::ProgramManager(const Device& device)
+ : use_assembly_programs{device.UseAssemblyShaders()} {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
} else {
@@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) {
}
void ProgramManager::BindGraphicsPipeline() {
- if (use_assembly_programs) {
- UpdateAssemblyPrograms();
- } else {
+ if (!use_assembly_programs) {
UpdateSourcePrograms();
}
}
@@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() {
}
}
-void ProgramManager::UpdateAssemblyPrograms() {
- const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
- if (current == old) {
- return;
- }
- if (current == 0) {
- if (enabled) {
- enabled = false;
- glDisable(stage);
- }
- return;
- }
- if (!enabled) {
- enabled = true;
- glEnable(stage);
- }
- glBindProgramARB(stage, current);
- };
+void ProgramManager::UseVertexShader(GLuint program) {
+ if (use_assembly_programs) {
+ BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
+ }
+ current_state.vertex = program;
+}
- update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
- update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
- old_state.geometry);
- update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
- old_state.fragment);
+void ProgramManager::UseGeometryShader(GLuint program) {
+ if (use_assembly_programs) {
+ BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
+ }
+ current_state.geometry = program;
+}
- old_state = current_state;
+void ProgramManager::UseFragmentShader(GLuint program) {
+ if (use_assembly_programs) {
+ BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
+ }
+ current_state.fragment = program;
}
void ProgramManager::UpdateSourcePrograms() {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 0f03b4f12..950e0dfcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,17 +45,9 @@ public:
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
- void UseVertexShader(GLuint program) {
- current_state.vertex = program;
- }
-
- void UseGeometryShader(GLuint program) {
- current_state.geometry = program;
- }
-
- void UseFragmentShader(GLuint program) {
- current_state.fragment = program;
- }
+ void UseVertexShader(GLuint program);
+ void UseGeometryShader(GLuint program);
+ void UseFragmentShader(GLuint program);
private:
struct PipelineState {
@@ -64,9 +56,6 @@ private:
GLuint fragment = 0;
};
- /// Update NV_gpu_program5 programs.
- void UpdateAssemblyPrograms();
-
/// Update GLSL programs.
void UpdateSourcePrograms();
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 9e74eda0d..4bf0d6090 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
@@ -11,7 +12,8 @@
namespace OpenGL::GLShader {
namespace {
-const char* GetStageDebugName(GLenum type) {
+
+std::string_view StageDebugName(GLenum type) {
switch (type) {
case GL_VERTEX_SHADER:
return "vertex";
@@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) {
UNIMPLEMENTED();
return "unknown";
}
+
} // Anonymous namespace
-GLuint LoadShader(const char* source, GLenum type) {
- const char* debug_type = GetStageDebugName(type);
+GLuint LoadShader(std::string_view source, GLenum type) {
+ const std::string_view debug_type = StageDebugName(type);
const GLuint shader_id = glCreateShader(type);
- glShaderSource(shader_id, 1, &source, nullptr);
+
+ const GLchar* source_string = source.data();
+ const GLint source_length = static_cast<GLint>(source.size());
+
+ glShaderSource(shader_id, 1, &source_string, &source_length);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 03b7548c2..1b770532e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) {
* @param source String of the GLSL shader program
* @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
*/
-GLuint LoadShader(const char* source, GLenum type);
+GLuint LoadShader(std::string_view source, GLenum type);
/**
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index d24fad3de..6bcf831f2 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -214,10 +214,8 @@ void SetupDirtyMisc(Tables& tables) {
} // Anonymous namespace
-StateTracker::StateTracker(Core::System& system) : system{system} {}
-
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
+StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
+ auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyColorMasks(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 0f823288e..9d127548f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -13,8 +13,8 @@
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
-namespace Core {
-class System;
+namespace Tegra {
+class GPU;
}
namespace OpenGL {
@@ -90,9 +90,7 @@ static_assert(Last <= std::numeric_limits<u8>::max());
class StateTracker {
public:
- explicit StateTracker(Core::System& system);
-
- void Initialize();
+ explicit StateTracker(Tegra::GPU& gpu);
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
@@ -103,7 +101,6 @@ public:
}
void NotifyScreenDrawVertexArray() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
@@ -117,98 +114,81 @@ public:
}
void NotifyPolygonModes() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true;
flags[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Scissors] = true;
flags[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ColorMasks] = true;
flags[OpenGL::Dirty::ColorMask0] = true;
}
void NotifyBlend0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::BlendStates] = true;
flags[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::AlphaTest] = true;
}
private:
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
GLuint index_buffer = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 6ec328c53..887995cf4 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -2,11 +2,13 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <deque>
+#include <tuple>
#include <vector>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
-OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
- bool use_persistent)
+OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
: buffer_size(size) {
gl_buffer.Create();
@@ -29,34 +30,22 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
allocate_size *= 2;
}
- if (use_persistent) {
- persistent = true;
- coherent = prefer_coherent;
- const GLbitfield flags =
- GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
- glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
- mapped_ptr = static_cast<u8*>(glMapNamedBufferRange(
- gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
- } else {
- glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
+ static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
+ glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
+ mapped_ptr = static_cast<u8*>(
+ glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
+
+ if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
+ glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
+ glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
}
OGLStreamBuffer::~OGLStreamBuffer() {
- if (persistent) {
- glUnmapNamedBuffer(gl_buffer.handle);
- }
+ glUnmapNamedBuffer(gl_buffer.handle);
gl_buffer.Release();
}
-GLuint OGLStreamBuffer::GetHandle() const {
- return gl_buffer.handle;
-}
-
-GLsizeiptr OGLStreamBuffer::GetSize() const {
- return buffer_size;
-}
-
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= buffer_size);
ASSERT(alignment <= buffer_size);
@@ -68,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
bool invalidate = false;
if (buffer_pos + size > buffer_size) {
+ MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
+ glInvalidateBufferData(gl_buffer.handle);
+
buffer_pos = 0;
invalidate = true;
-
- if (persistent) {
- glUnmapNamedBuffer(gl_buffer.handle);
- }
}
- if (invalidate || !persistent) {
- MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
- GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
- (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
- (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
- mapped_ptr = static_cast<u8*>(
- glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
- mapped_offset = buffer_pos;
- }
-
- return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
+ return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
- if (!coherent && size > 0) {
- glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size);
- }
-
- if (!persistent) {
- glUnmapNamedBuffer(gl_buffer.handle);
+ if (size > 0) {
+ glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
}
buffer_pos += size;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index f8383cbd4..307a67113 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -11,15 +11,13 @@
namespace OpenGL {
+class Device;
+
class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
- bool use_persistent = true);
+ explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
~OGLStreamBuffer();
- GLuint GetHandle() const;
- GLsizeiptr GetSize() const;
-
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
@@ -32,15 +30,24 @@ public:
void Unmap(GLsizeiptr size);
+ GLuint Handle() const {
+ return gl_buffer.handle;
+ }
+
+ u64 Address() const {
+ return gpu_address;
+ }
+
+ GLsizeiptr Size() const noexcept {
+ return buffer_size;
+ }
+
private:
OGLBuffer gl_buffer;
- bool coherent = false;
- bool persistent = false;
-
+ GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
GLsizeiptr buffer_size = 0;
- GLintptr mapped_offset = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 57db5a08b..a863ef218 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -41,91 +41,103 @@ struct FormatTuple {
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U
- {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U
- {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45
- {GL_COMPRESSED_RED_RGTC1}, // DXN1
- {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16
- {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4
- {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
- {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
- {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F
- {GL_R32F, GL_RED, GL_FLOAT}, // R32F
- {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U
- {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI
- {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16
- {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I
- {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S
- {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U
- {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S
- {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG8UI
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI
- {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI
- {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I
- {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8
- {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5
- {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4
- {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
+ {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
+ {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
+ {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
+ {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
+ {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
+ {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
+ {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
+ {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
+ {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
+ {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
+ {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
+ {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
+ {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
+ {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
+ {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
+ {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
+ {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
+ {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
+ {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
+ {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
+ {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
+ {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
+ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
+ {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
// Compressed sRGB formats
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
// Depth formats
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
// DepthStencil formats
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+ GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
@@ -178,10 +190,10 @@ GLint GetSwizzleSource(SwizzleSource source) {
GLenum GetComponent(PixelFormat format, bool is_first) {
switch (format) {
- case PixelFormat::Z24S8:
- case PixelFormat::Z32FS8:
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
- case PixelFormat::S8Z24:
+ case PixelFormat::S8_UINT_D24_UNORM:
return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
default:
UNREACHABLE();
@@ -263,9 +275,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
target = GetTextureTarget(params.target);
texture = CreateTexture(params, target, internal_format, texture_buffer);
DecorateSurfaceName();
- main_view = CreateViewInner(
- ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
- true);
+
+ u32 num_layers = 1;
+ if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
+ num_layers = params.depth;
+ }
+
+ main_view =
+ CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
}
CachedSurface::~CachedSurface() = default;
@@ -386,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() {
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
}
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
+void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
}
@@ -413,20 +430,23 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
CachedSurfaceView::~CachedSurfaceView() = default;
-void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
+void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
ASSERT(params.num_levels == 1);
+ if (params.target == SurfaceTarget::Texture3D) {
+ if (params.num_layers > 1) {
+ ASSERT(params.base_layer == 0);
+ glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
+ } else {
+ glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
+ params.base_level, params.base_layer);
+ }
+ return;
+ }
+
if (params.num_layers > 1) {
- // Layered framebuffer attachments
UNIMPLEMENTED_IF(params.base_layer != 0);
-
- switch (params.target) {
- case SurfaceTarget::Texture2DArray:
- glFramebufferTexture(target, attachment, GetTexture(), 0);
- break;
- default:
- UNIMPLEMENTED();
- }
+ glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
return;
}
@@ -434,16 +454,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
const GLuint texture = surface.GetTexture();
switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
+ glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
+ glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(target, attachment, texture, params.base_level,
+ glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
@@ -474,9 +494,9 @@ GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_sou
std::array swizzle{x_source, y_source, z_source, w_source};
switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
- case PixelFormat::Z24S8:
- case PixelFormat::Z32FS8:
- case PixelFormat::S8Z24:
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ case PixelFormat::S8_UINT_D24_UNORM:
UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
GetComponent(format, x_source == SwizzleSource::R));
@@ -500,17 +520,24 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
OGLTextureView texture_view;
texture_view.Create();
- glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
- params.num_levels, params.base_layer, params.num_layers);
+ if (target == GL_TEXTURE_3D) {
+ glTextureView(texture_view.handle, target, surface.texture.handle, format,
+ params.base_level, params.num_levels, 0, 1);
+ } else {
+ glTextureView(texture_view.handle, target, surface.texture.handle, format,
+ params.base_level, params.num_levels, params.base_layer, params.num_layers);
+ }
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
return texture_view;
}
-TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker)
- : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
+TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ StateTracker& state_tracker_)
+ : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{
+ state_tracker_} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
@@ -544,8 +571,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()};
- UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
- UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
+ UNIMPLEMENTED_IF(src_params.depth != 1);
+ UNIMPLEMENTED_IF(dst_params.depth != 1);
state_tracker.NotifyScissor0();
state_tracker.NotifyFramebuffer();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8a2ac8603..7787134fc 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -80,15 +80,17 @@ public:
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
~CachedSurfaceView();
- /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
- void Attach(GLenum attachment, GLenum target) const;
+ /// @brief Attaches this texture view to the currently bound fb_target framebuffer
+ /// @param attachment Attachment to bind textures to
+ /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
+ void Attach(GLenum attachment, GLenum fb_target) const;
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
- void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
+ void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
void MarkAsModified(u64 tick) {
surface.MarkAsModified(true, tick);
@@ -127,8 +129,10 @@ private:
class TextureCacheOpenGL final : public TextureCacheBase {
public:
- explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker);
+ explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ StateTracker& state_tracker);
~TextureCacheOpenGL();
protected:
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 994ae98eb..a8be2aa37 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -24,10 +24,11 @@ namespace MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
+inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
- case Maxwell::VertexAttribute::Type::UnsignedInt:
case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -47,11 +48,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
+ break;
}
- case Maxwell::VertexAttribute::Type::SignedInt:
+ break;
case Maxwell::VertexAttribute::Type::SignedNorm:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -71,9 +73,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
+ break;
}
+ break;
case Maxwell::VertexAttribute::Type::Float:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_16:
@@ -87,45 +89,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
+ break;
}
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- switch (attrib.size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- case Maxwell::VertexAttribute::Size::Size_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return GL_UNSIGNED_BYTE;
- case Maxwell::VertexAttribute::Size::Size_16:
- case Maxwell::VertexAttribute::Size::Size_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return GL_UNSIGNED_SHORT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
- }
- case Maxwell::VertexAttribute::Type::SignedScaled:
- switch (attrib.size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- case Maxwell::VertexAttribute::Size::Size_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return GL_BYTE;
- case Maxwell::VertexAttribute::Size::Size_16:
- case Maxwell::VertexAttribute::Size::Size_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return GL_SHORT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
- }
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
- return {};
+ break;
}
+ UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),
+ attrib.SizeString());
+ return {};
}
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -137,8 +107,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
- UNREACHABLE();
+ UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
return {};
}
@@ -180,33 +149,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
- Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
+ Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
switch (filter_mode) {
- case Tegra::Texture::TextureFilter::Linear: {
- switch (mip_filter_mode) {
+ case Tegra::Texture::TextureFilter::Nearest:
+ switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
- return GL_LINEAR;
+ return GL_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_LINEAR_MIPMAP_NEAREST;
+ return GL_NEAREST_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_LINEAR_MIPMAP_LINEAR;
+ return GL_NEAREST_MIPMAP_LINEAR;
}
break;
- }
- case Tegra::Texture::TextureFilter::Nearest: {
- switch (mip_filter_mode) {
+ case Tegra::Texture::TextureFilter::Linear:
+ switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
- return GL_NEAREST;
+ return GL_LINEAR;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_NEAREST_MIPMAP_NEAREST;
+ return GL_LINEAR_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_NEAREST_MIPMAP_LINEAR;
+ return GL_LINEAR_MIPMAP_LINEAR;
}
break;
}
- }
- LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
- return GL_LINEAR;
+ UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
+ static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
+ return GL_NEAREST;
}
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
@@ -229,10 +197,15 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
} else {
return GL_MIRROR_CLAMP_TO_EDGE;
}
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
- return GL_REPEAT;
+ case Tegra::Texture::WrapMode::MirrorOnceClampOGL:
+ if (GL_EXT_texture_mirror_clamp) {
+ return GL_MIRROR_CLAMP_EXT;
+ } else {
+ return GL_MIRROR_CLAMP_TO_EDGE;
+ }
}
+ UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+ return GL_REPEAT;
}
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
@@ -254,8 +227,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
case Tegra::Texture::DepthCompareFunc::Always:
return GL_ALWAYS;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
- static_cast<u32>(func));
+ UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
return GL_GREATER;
}
@@ -277,7 +249,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::MaxGL:
return GL_MAX;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+ UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
return GL_FUNC_ADD;
}
@@ -341,7 +313,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
+ UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
return GL_ZERO;
}
@@ -361,7 +333,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
case Tegra::Texture::SwizzleSource::OneFloat:
return GL_ONE;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+ UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
return GL_ZERO;
}
@@ -392,7 +364,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
return GL_ALWAYS;
}
@@ -423,7 +395,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
case Maxwell::StencilOp::DecrWrapOGL:
return GL_DECR_WRAP;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
+ UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
return GL_KEEP;
}
@@ -434,7 +406,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
+ UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
return GL_CCW;
}
@@ -447,7 +419,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
+ UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
return GL_BACK;
}
@@ -486,7 +458,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
case Maxwell::LogicOperation::Set:
return GL_SET;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
return GL_COPY;
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e7952924a..2ccca1993 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -21,6 +21,8 @@
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
+#include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -30,60 +32,6 @@ namespace OpenGL {
namespace {
-constexpr std::size_t SWAP_CHAIN_SIZE = 3;
-
-struct Frame {
- u32 width{}; /// Width of the frame (to detect resize)
- u32 height{}; /// Height of the frame
- bool color_reloaded{}; /// Texture attachment was recreated (ie: resized)
- OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO
- OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread
- OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
- GLsync render_fence{}; /// Fence created on the render thread
- GLsync present_fence{}; /// Fence created on the presentation thread
- bool is_srgb{}; /// Framebuffer is sRGB or RGB
-};
-
-constexpr char VERTEX_SHADER[] = R"(
-#version 430 core
-
-out gl_PerVertex {
- vec4 gl_Position;
-};
-
-layout (location = 0) in vec2 vert_position;
-layout (location = 1) in vec2 vert_tex_coord;
-layout (location = 0) out vec2 frag_tex_coord;
-
-// This is a truncated 3x3 matrix for 2D transformations:
-// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
-// The third column performs translation.
-// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
-// implicitly be [0, 0, 1]
-layout (location = 0) uniform mat3x2 modelview_matrix;
-
-void main() {
- // Multiply input position by the rotscale part of the matrix and then manually translate by
- // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
- // to `vec3(vert_position.xy, 1.0)`
- gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
- frag_tex_coord = vert_tex_coord;
-}
-)";
-
-constexpr char FRAGMENT_SHADER[] = R"(
-#version 430 core
-
-layout (location = 0) in vec2 frag_tex_coord;
-layout (location = 0) out vec4 color;
-
-layout (binding = 0) uniform sampler2D color_texture;
-
-void main() {
- color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
-}
-)";
-
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@@ -96,24 +44,6 @@ struct ScreenRectVertex {
std::array<GLfloat, 2> tex_coord;
};
-/// Returns true if any debug tool is attached
-bool HasDebugTool() {
- const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
- if (nsight) {
- return true;
- }
-
- GLint num_extensions;
- glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
- for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
- const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
- if (!std::strcmp(name, "GL_EXT_debug_tool")) {
- return true;
- }
- }
- return false;
-}
-
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -197,132 +127,15 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
-/**
- * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
- * but also make sure that rendering happens at the pace that the frontend dictates. This is a
- * helper class that the renderer uses to sync frames between the render thread and the presentation
- * thread
- */
-class FrameMailbox {
-public:
- std::mutex swap_chain_lock;
- std::condition_variable present_cv;
- std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
- std::queue<Frame*> free_queue;
- std::deque<Frame*> present_queue;
- Frame* previous_frame{};
-
- FrameMailbox() {
- for (auto& frame : swap_chain) {
- free_queue.push(&frame);
- }
- }
-
- ~FrameMailbox() {
- // lock the mutex and clear out the present and free_queues and notify any people who are
- // blocked to prevent deadlock on shutdown
- std::scoped_lock lock{swap_chain_lock};
- std::queue<Frame*>().swap(free_queue);
- present_queue.clear();
- present_cv.notify_all();
- }
-
- void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
- frame->present.Release();
- frame->present.Create();
- GLint previous_draw_fbo{};
- glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
- glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
- frame->color_reloaded = false;
- }
-
- void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
- // Recreate the color texture attachment
- frame->color.Release();
- frame->color.Create();
- const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
- glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
-
- // Recreate the FBO for the render target
- frame->render.Release();
- frame->render.Create();
- glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
- }
-
- frame->width = width;
- frame->height = height;
- frame->color_reloaded = true;
- }
-
- Frame* GetRenderFrame() {
- std::unique_lock lock{swap_chain_lock};
-
- // If theres no free frames, we will reuse the oldest render frame
- if (free_queue.empty()) {
- auto frame = present_queue.back();
- present_queue.pop_back();
- return frame;
- }
-
- Frame* frame = free_queue.front();
- free_queue.pop();
- return frame;
- }
-
- void ReleaseRenderFrame(Frame* frame) {
- std::unique_lock lock{swap_chain_lock};
- present_queue.push_front(frame);
- present_cv.notify_one();
- }
-
- Frame* TryGetPresentFrame(int timeout_ms) {
- std::unique_lock lock{swap_chain_lock};
- // wait for new entries in the present_queue
- present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
- [&] { return !present_queue.empty(); });
- if (present_queue.empty()) {
- // timed out waiting for a frame to draw so return the previous frame
- return previous_frame;
- }
-
- // free the previous frame and add it back to the free queue
- if (previous_frame) {
- free_queue.push(previous_frame);
- }
-
- // the newest entries are pushed to the front of the queue
- Frame* frame = present_queue.front();
- present_queue.pop_front();
- // remove all old entries from the present queue and move them back to the free_queue
- for (auto f : present_queue) {
- free_queue.push(f);
- }
- present_queue.clear();
- previous_frame = frame;
- return frame;
- }
-};
-
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
- Core::Frontend::GraphicsContext& context)
- : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
- program_manager{device}, has_debug_tool{HasDebugTool()} {}
+RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window_,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context)
+ : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_},
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
RendererOpenGL::~RendererOpenGL() = default;
-MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
-MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
-
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
@@ -331,79 +144,34 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
PrepareRendertarget(framebuffer);
RenderScreenshot();
- Frame* frame;
- {
- MICROPROFILE_SCOPE(OpenGL_WaitPresent);
-
- frame = frame_mailbox->GetRenderFrame();
-
- // Clean up sync objects before drawing
-
- // INTEL driver workaround. We can't delete the previous render sync object until we are
- // sure that the presentation is done
- if (frame->present_fence) {
- glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
- }
-
- // delete the draw fence if the frame wasn't presented
- if (frame->render_fence) {
- glDeleteSync(frame->render_fence);
- frame->render_fence = 0;
- }
-
- // wait for the presentation to be done
- if (frame->present_fence) {
- glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
- glDeleteSync(frame->present_fence);
- frame->present_fence = 0;
- }
- }
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+ DrawScreen(emu_window.GetFramebufferLayout());
- {
- MICROPROFILE_SCOPE(OpenGL_RenderFrame);
- const auto& layout = render_window.GetFramebufferLayout();
+ ++m_current_frame;
- // Recreate the frame if the size of the window has changed
- if (layout.width != frame->width || layout.height != frame->height ||
- screen_info.display_srgb != frame->is_srgb) {
- LOG_DEBUG(Render_OpenGL, "Reloading render frame");
- frame->is_srgb = screen_info.display_srgb;
- frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
- DrawScreen(layout);
- // Create a fence for the frontend to wait on and swap this frame to OffTex
- frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- glFlush();
- frame_mailbox->ReleaseRenderFrame(frame);
- m_current_frame++;
- rasterizer->TickFrame();
- }
+ rasterizer->TickFrame();
render_window.PollEvents();
- if (has_debug_tool) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
- Present(0);
- context.SwapBuffers();
- }
+ context->SwapBuffers();
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
- if (framebuffer) {
- // If framebuffer is provided, reload it from memory to a texture
- if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
- screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
- screen_info.texture.pixel_format != framebuffer->pixel_format ||
- gl_framebuffer_data.empty()) {
- // Reallocate texture if the framebuffer size has changed.
- // This is expected to not happen very often and hence should not be a
- // performance problem.
- ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
- }
-
- // Load the framebuffer from memory, draw it to the screen, and swap buffers
- LoadFBToScreenInfo(*framebuffer);
+ if (!framebuffer) {
+ return;
+ }
+ // If framebuffer is provided, reload it from memory to a texture
+ if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
+ screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format ||
+ gl_framebuffer_data.empty()) {
+ // Reallocate texture if the framebuffer size has changed.
+ // This is expected to not happen very often and hence should not be a
+ // performance problem.
+ ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
}
+
+ // Load the framebuffer from memory, draw it to the screen, and swap buffers
+ LoadFBToScreenInfo(*framebuffer);
}
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
@@ -423,7 +191,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)};
+ u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
// TODO(Rodrigo): Read this from HLE
@@ -453,17 +221,15 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
}
void RendererOpenGL::InitOpenGLObjects() {
- frame_mailbox = std::make_unique<FrameMailbox>();
-
- glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
- 0.0f);
+ glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
+ Settings::values.bg_blue.GetValue(), 0.0f);
// Create shader programs
OGLShader vertex_shader;
- vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
+ vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
OGLShader fragment_shader;
- fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
+ fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
@@ -488,6 +254,15 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
+
+ // Enable unified vertex attributes and query vertex buffer address when the driver supports it
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+
+ glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
+ glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
+ &vertex_buffer_address);
+ }
}
void RendererOpenGL::AddTelemetryFields() {
@@ -499,18 +274,18 @@ void RendererOpenGL::AddTelemetryFields() {
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
- auto& telemetry_session = system.TelemetrySession();
- telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
- telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
- telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+ constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
+ telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
+ telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
+ telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version);
}
void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
- program_manager, state_tracker);
+ rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
+ screen_info, program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -526,12 +301,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
GLint internal_format;
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
- case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
internal_format = GL_RGB565;
texture.gl_format = GL_RGB;
texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
@@ -552,8 +327,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
- glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
- 0.0f);
+ glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
+ Settings::values.bg_blue.GetValue(), 0.0f);
}
// Set projection matrix
@@ -656,7 +431,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
offsetof(ScreenRectVertex, tex_coord));
glVertexAttribBinding(PositionLocation, 0);
glVertexAttribBinding(TexCoordLocation, 0);
- glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
+ sizeof(vertices));
+ } else {
+ glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+ }
glBindTextureUnit(0, screen_info.display_texture);
glBindSampler(0, 0);
@@ -667,51 +448,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.RestoreGuestPipeline();
}
-bool RendererOpenGL::TryPresent(int timeout_ms) {
- if (has_debug_tool) {
- LOG_DEBUG(Render_OpenGL,
- "Skipping presentation because we are presenting on the main context");
- return false;
- }
- return Present(timeout_ms);
-}
-
-bool RendererOpenGL::Present(int timeout_ms) {
- const auto& layout = render_window.GetFramebufferLayout();
- auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
- if (!frame) {
- LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
- return false;
- }
-
- // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
- // readback since we won't be doing any blending
- glClear(GL_COLOR_BUFFER_BIT);
-
- // Recreate the presentation FBO if the color attachment was changed
- if (frame->color_reloaded) {
- LOG_DEBUG(Render_OpenGL, "Reloading present frame");
- frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
- }
- glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED);
- // INTEL workaround.
- // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
- // it on the emulation thread without too much penalty
- // glDeleteSync(frame.render_sync);
- // frame.render_sync = 0;
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
- glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height,
- GL_COLOR_BUFFER_BIT, GL_LINEAR);
-
- // Insert fence for the main thread to block on
- frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- glFlush();
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
- return true;
-}
-
void RendererOpenGL::RenderScreenshot() {
if (!renderer_settings.screenshot_requested) {
return;
@@ -726,7 +462,7 @@ void RendererOpenGL::RenderScreenshot() {
screenshot_framebuffer.Create();
glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
- Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
+ const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
@@ -751,11 +487,9 @@ void RendererOpenGL::RenderScreenshot() {
}
bool RendererOpenGL::Init() {
- if (GLAD_GL_KHR_debug) {
+ if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
- if (Settings::values.renderer_debug) {
- glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
- }
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 61bf507f4..9ef181f95 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -16,16 +16,25 @@
namespace Core {
class System;
-}
+class TelemetrySession;
+} // namespace Core
namespace Core::Frontend {
class EmuWindow;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Layout {
struct FramebufferLayout;
}
+namespace Tegra {
+class GPU;
+}
+
namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen
@@ -46,24 +55,17 @@ struct ScreenInfo {
TextureInfo texture;
};
-struct PresentationTexture {
- u32 width = 0;
- u32 height = 0;
- OGLTexture texture;
-};
-
-class FrameMailbox;
-
class RendererOpenGL final : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
- Core::Frontend::GraphicsContext& context);
+ explicit RendererOpenGL(Core::TelemetrySession& telemetry_session,
+ Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererOpenGL() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- bool TryPresent(int timeout_ms) override;
private:
/// Initializes the OpenGL state and creates persistent objects.
@@ -91,14 +93,13 @@ private:
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
- bool Present(int timeout_ms);
-
+ Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
- Core::System& system;
- Core::Frontend::GraphicsContext& context;
- const Device device;
+ Core::Memory::Memory& cpu_memory;
+ Tegra::GPU& gpu;
- StateTracker state_tracker{system};
+ const Device device;
+ StateTracker state_tracker{gpu};
// OpenGL object IDs
OGLBuffer vertex_buffer;
@@ -107,6 +108,9 @@ private:
OGLPipeline pipeline;
OGLFramebuffer screenshot_framebuffer;
+ // GPU address of the vertex buffer
+ GLuint64EXT vertex_buffer_address = 0;
+
/// Display information for Switch screen
ScreenInfo screen_info;
@@ -117,13 +121,8 @@ private:
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
- Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
+ Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
-
- /// Frame presentation mailbox
- std::unique_ptr<FrameMailbox> frame_mailbox;
-
- bool has_debug_tool = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 424278816..81a39a3b8 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -39,52 +39,17 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
} // Anonymous namespace
-void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept {
- raw = 0;
- front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail));
- front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail));
- front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass));
- front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func));
- if (regs.stencil_two_side_enable) {
- back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail));
- back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail));
- back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass));
- back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func));
- } else {
- back.action_stencil_fail.Assign(front.action_stencil_fail);
- back.action_depth_fail.Assign(front.action_depth_fail);
- back.action_depth_pass.Assign(front.action_depth_pass);
- back.test_func.Assign(front.test_func);
- }
- depth_test_enable.Assign(regs.depth_test_enable);
- depth_write_enable.Assign(regs.depth_write_enabled);
- depth_bounds_enable.Assign(regs.depth_bounds_enable);
- stencil_enable.Assign(regs.stencil_enable);
- depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
-}
-
-void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
- const auto& clip = regs.view_volume_clip_control;
+void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
const std::array enabled_lut = {regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
- u32 packed_front_face = PackFrontFace(regs.front_face);
- if (regs.screen_y_control.triangle_rast_flip != 0) {
- // Flip front face
- packed_front_face = 1 - packed_front_face;
- }
-
raw = 0;
- topology.Assign(topology_index);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
- cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
- cull_face.Assign(PackCullFace(regs.cull_face));
- front_face.Assign(packed_front_face);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value()));
@@ -93,19 +58,37 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
+
std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
-}
-void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept {
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ binding_divisors[index] =
+ regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
+ }
+
+ for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ const auto& input = regs.vertex_attrib_format[index];
+ auto& attribute = attributes[index];
+ attribute.raw = 0;
+ attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
+ attribute.buffer.Assign(input.buffer);
+ attribute.offset.Assign(input.offset);
+ attribute.type.Assign(static_cast<u32>(input.type.Value()));
+ attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ }
+
for (std::size_t index = 0; index < std::size(attachments); ++index) {
attachments[index].Fill(regs, index);
}
-}
-void FixedPipelineState::ViewportSwizzles::Fill(const Maxwell& regs) noexcept {
const auto& transform = regs.viewport_transform;
- std::transform(transform.begin(), transform.end(), swizzles.begin(),
+ std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
[](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
+
+ if (!has_extended_dynamic_state) {
+ no_extended_dynamic_state.Assign(1);
+ dynamic_state.Fill(regs);
+ }
}
void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
@@ -147,20 +130,57 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
enable.Assign(1);
}
-void FixedPipelineState::Fill(const Maxwell& regs) {
- rasterizer.Fill(regs);
- depth_stencil.Fill(regs);
- color_blending.Fill(regs);
- viewport_swizzles.Fill(regs);
+void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
+ const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
+ u32 packed_front_face = PackFrontFace(regs.front_face);
+ if (regs.screen_y_control.triangle_rast_flip != 0) {
+ // Flip front face
+ packed_front_face = 1 - packed_front_face;
+ }
+
+ raw1 = 0;
+ raw2 = 0;
+ front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail));
+ front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail));
+ front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass));
+ front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func));
+ if (regs.stencil_two_side_enable) {
+ back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail));
+ back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail));
+ back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass));
+ back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func));
+ } else {
+ back.action_stencil_fail.Assign(front.action_stencil_fail);
+ back.action_depth_fail.Assign(front.action_depth_fail);
+ back.action_depth_pass.Assign(front.action_depth_pass);
+ back.test_func.Assign(front.test_func);
+ }
+ stencil_enable.Assign(regs.stencil_enable);
+ depth_write_enable.Assign(regs.depth_write_enabled);
+ depth_bounds_enable.Assign(regs.depth_bounds_enable);
+ depth_test_enable.Assign(regs.depth_test_enable);
+ front_face.Assign(packed_front_face);
+ depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
+ topology.Assign(topology_index);
+ cull_face.Assign(PackCullFace(regs.cull_face));
+ cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
+
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const auto& input = regs.vertex_array[index];
+ VertexBinding& binding = vertex_bindings[index];
+ binding.raw = 0;
+ binding.enabled.Assign(input.IsEnabled() ? 1 : 0);
+ binding.stride.Assign(static_cast<u16>(input.stride.Value()));
+ }
}
std::size_t FixedPipelineState::Hash() const noexcept {
- const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+ const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<std::size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
- return std::memcmp(this, &rhs, sizeof *this) == 0;
+ return std::memcmp(this, &rhs, Size()) == 0;
}
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 31a6398f2..cdcbb65f5 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -60,14 +60,6 @@ struct FixedPipelineState {
void Fill(const Maxwell& regs, std::size_t index);
- std::size_t Hash() const noexcept;
-
- bool operator==(const BlendingAttachment& rhs) const noexcept;
-
- bool operator!=(const BlendingAttachment& rhs) const noexcept {
- return !operator==(rhs);
- }
-
constexpr std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
@@ -97,156 +89,116 @@ struct FixedPipelineState {
}
};
- struct VertexInput {
- union Binding {
- u16 raw;
- BitField<0, 1, u16> enabled;
- BitField<1, 12, u16> stride;
- };
+ union VertexAttribute {
+ u32 raw;
+ BitField<0, 1, u32> enabled;
+ BitField<1, 5, u32> buffer;
+ BitField<6, 14, u32> offset;
+ BitField<20, 3, u32> type;
+ BitField<23, 6, u32> size;
- union Attribute {
- u32 raw;
- BitField<0, 1, u32> enabled;
- BitField<1, 5, u32> buffer;
- BitField<6, 14, u32> offset;
- BitField<20, 3, u32> type;
- BitField<23, 6, u32> size;
-
- constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
- return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
- }
-
- constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
- return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
- }
- };
-
- std::array<Binding, Maxwell::NumVertexArrays> bindings;
- std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
- std::array<Attribute, Maxwell::NumVertexAttributes> attributes;
-
- void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept {
- auto& binding = bindings[index];
- binding.raw = 0;
- binding.enabled.Assign(enabled ? 1 : 0);
- binding.stride.Assign(static_cast<u16>(stride));
- binding_divisors[index] = divisor;
+ constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
+ return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
- void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset,
- Maxwell::VertexAttribute::Type type,
- Maxwell::VertexAttribute::Size size) noexcept {
- auto& attribute = attributes[index];
- attribute.raw = 0;
- attribute.enabled.Assign(enabled ? 1 : 0);
- attribute.buffer.Assign(buffer);
- attribute.offset.Assign(offset);
- attribute.type.Assign(static_cast<u32>(type));
- attribute.size.Assign(static_cast<u32>(size));
+ constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
+ return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
- struct Rasterizer {
- union {
- u32 raw;
- BitField<0, 4, u32> topology;
- BitField<4, 1, u32> primitive_restart_enable;
- BitField<5, 1, u32> cull_enable;
- BitField<6, 1, u32> depth_bias_enable;
- BitField<7, 1, u32> depth_clamp_disabled;
- BitField<8, 1, u32> ndc_minus_one_to_one;
- BitField<9, 2, u32> cull_face;
- BitField<11, 1, u32> front_face;
- BitField<12, 2, u32> polygon_mode;
- BitField<14, 5, u32> patch_control_points_minus_one;
- BitField<19, 2, u32> tessellation_primitive;
- BitField<21, 2, u32> tessellation_spacing;
- BitField<23, 1, u32> tessellation_clockwise;
- BitField<24, 1, u32> logic_op_enable;
- BitField<25, 4, u32> logic_op;
- BitField<29, 1, u32> rasterize_enable;
- };
-
- // TODO(Rodrigo): Move this to push constants
- u32 point_size;
+ template <std::size_t Position>
+ union StencilFace {
+ BitField<Position + 0, 3, u32> action_stencil_fail;
+ BitField<Position + 3, 3, u32> action_depth_fail;
+ BitField<Position + 6, 3, u32> action_depth_pass;
+ BitField<Position + 9, 3, u32> test_func;
- void Fill(const Maxwell& regs) noexcept;
+ Maxwell::StencilOp ActionStencilFail() const noexcept {
+ return UnpackStencilOp(action_stencil_fail);
+ }
- constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
- return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
+ Maxwell::StencilOp ActionDepthFail() const noexcept {
+ return UnpackStencilOp(action_depth_fail);
}
- Maxwell::CullFace CullFace() const noexcept {
- return UnpackCullFace(cull_face.Value());
+ Maxwell::StencilOp ActionDepthPass() const noexcept {
+ return UnpackStencilOp(action_depth_pass);
}
- Maxwell::FrontFace FrontFace() const noexcept {
- return UnpackFrontFace(front_face.Value());
+ Maxwell::ComparisonOp TestFunc() const noexcept {
+ return UnpackComparisonOp(test_func);
}
};
- struct DepthStencil {
- template <std::size_t Position>
- union StencilFace {
- BitField<Position + 0, 3, u32> action_stencil_fail;
- BitField<Position + 3, 3, u32> action_depth_fail;
- BitField<Position + 6, 3, u32> action_depth_pass;
- BitField<Position + 9, 3, u32> test_func;
-
- Maxwell::StencilOp ActionStencilFail() const noexcept {
- return UnpackStencilOp(action_stencil_fail);
- }
-
- Maxwell::StencilOp ActionDepthFail() const noexcept {
- return UnpackStencilOp(action_depth_fail);
- }
-
- Maxwell::StencilOp ActionDepthPass() const noexcept {
- return UnpackStencilOp(action_depth_pass);
- }
-
- Maxwell::ComparisonOp TestFunc() const noexcept {
- return UnpackComparisonOp(test_func);
- }
- };
+ union VertexBinding {
+ u16 raw;
+ BitField<0, 12, u16> stride;
+ BitField<12, 1, u16> enabled;
+ };
+ struct DynamicState {
union {
- u32 raw;
+ u32 raw1;
StencilFace<0> front;
StencilFace<12> back;
- BitField<24, 1, u32> depth_test_enable;
+ BitField<24, 1, u32> stencil_enable;
BitField<25, 1, u32> depth_write_enable;
BitField<26, 1, u32> depth_bounds_enable;
- BitField<27, 1, u32> stencil_enable;
- BitField<28, 3, u32> depth_test_func;
+ BitField<27, 1, u32> depth_test_enable;
+ BitField<28, 1, u32> front_face;
+ BitField<29, 3, u32> depth_test_func;
+ };
+ union {
+ u32 raw2;
+ BitField<0, 4, u32> topology;
+ BitField<4, 2, u32> cull_face;
+ BitField<6, 1, u32> cull_enable;
};
+ std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings;
- void Fill(const Maxwell& regs) noexcept;
+ void Fill(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
}
- };
-
- struct ColorBlending {
- std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
- void Fill(const Maxwell& regs) noexcept;
- };
+ Maxwell::CullFace CullFace() const noexcept {
+ return UnpackCullFace(cull_face.Value());
+ }
- struct ViewportSwizzles {
- std::array<u16, Maxwell::NumViewports> swizzles;
+ Maxwell::FrontFace FrontFace() const noexcept {
+ return UnpackFrontFace(front_face.Value());
+ }
- void Fill(const Maxwell& regs) noexcept;
+ constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
+ return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
+ }
};
- VertexInput vertex_input;
- Rasterizer rasterizer;
- DepthStencil depth_stencil;
- ColorBlending color_blending;
- ViewportSwizzles viewport_swizzles;
+ union {
+ u32 raw;
+ BitField<0, 1, u32> no_extended_dynamic_state;
+ BitField<2, 1, u32> primitive_restart_enable;
+ BitField<3, 1, u32> depth_bias_enable;
+ BitField<4, 1, u32> depth_clamp_disabled;
+ BitField<5, 1, u32> ndc_minus_one_to_one;
+ BitField<6, 2, u32> polygon_mode;
+ BitField<8, 5, u32> patch_control_points_minus_one;
+ BitField<13, 2, u32> tessellation_primitive;
+ BitField<15, 2, u32> tessellation_spacing;
+ BitField<17, 1, u32> tessellation_clockwise;
+ BitField<18, 1, u32> logic_op_enable;
+ BitField<19, 4, u32> logic_op;
+ BitField<23, 1, u32> rasterize_enable;
+ };
+ u32 point_size;
+ std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
+ std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
+ std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
+ std::array<u16, Maxwell::NumViewports> viewport_swizzles;
+ DynamicState dynamic_state;
- void Fill(const Maxwell& regs);
+ void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
std::size_t Hash() const noexcept;
@@ -255,6 +207,11 @@ struct FixedPipelineState {
bool operator!=(const FixedPipelineState& rhs) const noexcept {
return !operator==(rhs);
}
+
+ std::size_t Size() const noexcept {
+ const std::size_t total_size = sizeof *this;
+ return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
+ }
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 62e950d31..d22de1d81 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -21,29 +21,29 @@ namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
switch (filter) {
- case Tegra::Texture::TextureFilter::Linear:
- return VK_FILTER_LINEAR;
case Tegra::Texture::TextureFilter::Nearest:
return VK_FILTER_NEAREST;
+ case Tegra::Texture::TextureFilter::Linear:
+ return VK_FILTER_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+ UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
return {};
}
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
switch (mipmap_filter) {
case Tegra::Texture::TextureMipmapFilter::None:
- // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
- // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
- // use an image view with a single mipmap level to emulate this.
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
- ;
- case Tegra::Texture::TextureMipmapFilter::Linear:
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
+ // There are no Vulkan filter modes that directly correspond to OpenGL minification filters
+ // of GL_LINEAR or GL_NEAREST, but they can be emulated using
+ // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
+ // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
+ return VK_SAMPLER_MIPMAP_MODE_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
+ case Tegra::Texture::TextureMipmapFilter::Linear:
+ return VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+ UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
return {};
}
@@ -118,90 +118,101 @@ struct FormatTuple {
VkFormat format; ///< Vulkan format
int usage = 0; ///< Describes image format usage
} constexpr tex_format_tuples[] = {
- {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U
- {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S
- {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI
- {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U
- {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U
- {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle)
- {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U
- {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI
- {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F
- {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U
- {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S
- {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI
- {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F
- {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI
- {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1
- {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23
- {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45
- {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1
- {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM
- {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM
- {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U
- {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
- {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
- {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
- {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8
- {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
- {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
- {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
- {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
- {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
- {VK_FORMAT_UNDEFINED}, // R16S
- {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
- {VK_FORMAT_UNDEFINED}, // R16I
- {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
- {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
- {VK_FORMAT_UNDEFINED}, // RG16UI
- {VK_FORMAT_UNDEFINED}, // RG16I
- {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S
- {VK_FORMAT_UNDEFINED}, // RGB32F
- {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB
- {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U
- {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S
- {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // RG8UI
- {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI
- {VK_FORMAT_UNDEFINED}, // RGBX16F
- {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI
- {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I
- {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
- {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
- {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
- {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB
- {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
- {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
- {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
- {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB
- {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U
- {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
- {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
- {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
- {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB
- {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5
- {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB
- {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8
- {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB
- {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6
- {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
- {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10
- {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
- {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12
- {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
- {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6
- {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
- {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5
- {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
- {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F
+ {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // A8B8G8R8_UNORM
+ {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // A8B8G8R8_SNORM
+ {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
+ {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
+ {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
+ {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM
+ {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
+ {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
+ {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
+ {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
+ {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
+ {VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM
+ {VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT
+ {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8_UINT
+ {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT
+ {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // R16G16B16A16_UNORM
+ {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // R16G16B16A16_SNORM
+ {VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage}, // R16G16B16A16_SINT
+ {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // R16G16B16A16_UINT
+ {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT
+ {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // R32G32B32A32_UINT
+ {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // BC1_RGBA_UNORM
+ {VK_FORMAT_BC2_UNORM_BLOCK}, // BC2_UNORM
+ {VK_FORMAT_BC3_UNORM_BLOCK}, // BC3_UNORM
+ {VK_FORMAT_BC4_UNORM_BLOCK}, // BC4_UNORM
+ {VK_FORMAT_BC4_SNORM_BLOCK}, // BC4_SNORM
+ {VK_FORMAT_BC5_UNORM_BLOCK}, // BC5_UNORM
+ {VK_FORMAT_BC5_SNORM_BLOCK}, // BC5_SNORM
+ {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7_UNORM
+ {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT
+ {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT
+ {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM
+ {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM
+ {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT
+ {VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT
+ {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT
+ {VK_FORMAT_R32G32_SINT, Attachable | Storage}, // R32G32_SINT
+ {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
+ {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
+ {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
+ {VK_FORMAT_UNDEFINED}, // R16_SNORM
+ {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
+ {VK_FORMAT_UNDEFINED}, // R16_SINT
+ {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
+ {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
+ {VK_FORMAT_UNDEFINED}, // R16G16_UINT
+ {VK_FORMAT_UNDEFINED}, // R16G16_SINT
+ {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
+ {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
+ {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
+ {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
+ {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
+ {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
+ {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT
+ {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT
+ {VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT
+ {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT
+ {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT
+ {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
+ {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM
+ {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM
+ {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
+ {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
+ {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
+ {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
+ {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB
+ {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM
+ {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
+ {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
+ {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
+ {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB
+ {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5_UNORM
+ {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB
+ {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8_UNORM
+ {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB
+ {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM
+ {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
+ {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM
+ {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
+ {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM
+ {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
+ {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM
+ {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
+ {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
+ {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
+ {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT
// Depth formats
- {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F
- {VK_FORMAT_D16_UNORM, Attachable}, // Z16
+ {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
+ {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
// DepthStencil formats
- {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8
- {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated)
- {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8
+ {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT
+ {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated)
+ {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT
};
static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);
@@ -222,7 +233,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
}
- // Use ABGR8 on hardware that doesn't support ASTC natively
+ // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
? VK_FORMAT_A8B8G8R8_SRGB_PACK32
@@ -296,6 +307,30 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
switch (type) {
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ default:
+ break;
+ }
+ break;
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
@@ -320,56 +355,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
break;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_UNORM;
+ return VK_FORMAT_R8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_UNORM;
+ return VK_FORMAT_R8G8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_UNORM;
+ return VK_FORMAT_R8G8B8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_UNORM;
+ return VK_FORMAT_R8G8B8A8_USCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_UNORM;
+ return VK_FORMAT_R16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_UNORM;
+ return VK_FORMAT_R16G16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_UNORM;
+ return VK_FORMAT_R16G16B16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_UNORM;
+ return VK_FORMAT_R16G16B16A16_USCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
default:
break;
}
break;
- case Maxwell::VertexAttribute::Type::SignedInt:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SINT;
+ return VK_FORMAT_R8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SINT;
+ return VK_FORMAT_R8G8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SINT;
+ return VK_FORMAT_R8G8B8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SINT;
+ return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SINT;
+ return VK_FORMAT_R16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SINT;
+ return VK_FORMAT_R16G16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SINT;
+ return VK_FORMAT_R16G16B16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SINT;
- case Maxwell::VertexAttribute::Size::Size_32:
- return VK_FORMAT_R32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32:
- return VK_FORMAT_R32G32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32:
- return VK_FORMAT_R32G32B32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
- return VK_FORMAT_R32G32B32A32_SINT;
+ return VK_FORMAT_R16G16B16A16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
default:
break;
}
@@ -400,56 +429,54 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_UINT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UINT_PACK32;
default:
break;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_USCALED;
+ return VK_FORMAT_R8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_USCALED;
+ return VK_FORMAT_R8G8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_USCALED;
+ return VK_FORMAT_R8G8B8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_USCALED;
+ return VK_FORMAT_R8G8B8A8_SINT;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_USCALED;
+ return VK_FORMAT_R16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_USCALED;
+ return VK_FORMAT_R16G16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_USCALED;
+ return VK_FORMAT_R16G16B16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_USCALED;
+ return VK_FORMAT_R16G16B16A16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return VK_FORMAT_R32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return VK_FORMAT_R32G32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return VK_FORMAT_R32G32B32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return VK_FORMAT_R32G32B32A32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SINT_PACK32;
default:
break;
}
break;
- case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::Float:
switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SSCALED;
+ return VK_FORMAT_R16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SSCALED;
+ return VK_FORMAT_R16G16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SSCALED;
+ return VK_FORMAT_R16G16B16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SSCALED;
- default:
- break;
- }
- break;
- case Maxwell::VertexAttribute::Type::Float:
- switch (size) {
+ return VK_FORMAT_R16G16B16A16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -458,14 +485,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SFLOAT;
default:
break;
}
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
index 435c8c1b8..5b01020ec 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
+++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
@@ -65,10 +65,10 @@ bool NsightAftermathTracker::Initialize() {
return false;
}
- dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
+ dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
- (void)FileUtil::DeleteDirRecursively(dump_dir);
- if (!FileUtil::CreateDir(dump_dir)) {
+ (void)Common::FS::DeleteDirRecursively(dump_dir);
+ if (!Common::FS::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
return false;
}
@@ -106,7 +106,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
return;
}
- FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
+ Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
return;
@@ -156,12 +156,12 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
}();
std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
- if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
+ if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
LOG_ERROR(Render_Vulkan, "Failed to write dump file");
return;
}
const std::string_view json_view(json.data(), json.size());
- if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
+ if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write JSON");
return;
}
@@ -180,7 +180,7 @@ void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_
const std::string path =
fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
- FileUtil::IOFile file(path, "wb");
+ Common::FS::IOFile file(path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
return;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 59b441943..715182b3b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,6 +13,7 @@
#include <fmt/format.h>
#include "common/dynamic_library.h"
+#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/telemetry.h"
#include "core/core.h"
@@ -24,9 +25,9 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -55,7 +56,7 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
VkDebugUtilsMessageTypeFlagsEXT type,
const VkDebugUtilsMessengerCallbackDataEXT* data,
[[maybe_unused]] void* user_data) {
- const char* message{data->pMessage};
+ const char* const message{data->pMessage};
if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
LOG_CRITICAL(Render_Vulkan, "{}", message);
@@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() {
char* libvulkan_env = getenv("LIBVULKAN_PATH");
if (!libvulkan_env || !library.Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
- std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+ const std::string filename =
+ Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
library.Open(filename.c_str());
}
#else
@@ -84,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
if (!library.Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
- library.Open(filename.c_str());
+ (void)library.Open(filename.c_str());
}
#endif
return library;
@@ -153,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc
}
}
- static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"};
- vk::Span<const char*> layers = layers_data;
- if (!enable_layers) {
- layers = {};
+ std::vector<const char*> layers;
+ layers.reserve(1);
+ if (enable_layers) {
+ layers.push_back("VK_LAYER_KHRONOS_validation");
+ }
+
+ const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
+ if (!layer_properties) {
+ LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
+ layers.clear();
+ }
+
+ for (auto layer_it = layers.begin(); layer_it != layers.end();) {
+ const char* const layer = *layer_it;
+ const auto it = std::find_if(
+ layer_properties->begin(), layer_properties->end(),
+ [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
+ if (it == layer_properties->end()) {
+ LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
+ layer_it = layers.erase(layer_it);
+ } else {
+ ++layer_it;
+ }
}
+
vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
if (!instance) {
LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
@@ -215,8 +237,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
} // Anonymous namespace
-RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
- : RendererBase(window), system{system} {}
+RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context)
+ : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_},
+ cpu_memory{cpu_memory_}, gpu{gpu_} {}
RendererVulkan::~RendererVulkan() {
ShutDown();
@@ -243,11 +269,11 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
scheduler->WaitWorker();
swapchain->AcquireNextImage();
- const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
+ const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
- scheduler->Flush(false, render_semaphore);
+ scheduler->Flush(render_semaphore);
- if (swapchain->Present(render_semaphore, fence)) {
+ if (swapchain->Present(render_semaphore)) {
blit_screen->Recreate();
}
@@ -257,11 +283,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
render_window.PollEvents();
}
-bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
- // TODO (bunnei): ImplementMe
- return true;
-}
-
bool RendererVulkan::Init() {
library = OpenVulkanLibrary();
instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
@@ -274,23 +295,21 @@ bool RendererVulkan::Init() {
memory_manager = std::make_unique<VKMemoryManager>(*device);
- resource_manager = std::make_unique<VKResourceManager>(*device);
+ state_tracker = std::make_unique<StateTracker>(gpu);
+
+ scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
const auto& framebuffer = render_window.GetFramebufferLayout();
- swapchain = std::make_unique<VKSwapchain>(*surface, *device);
+ swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
swapchain->Create(framebuffer.width, framebuffer.height, false);
- state_tracker = std::make_unique<StateTracker>(system);
-
- scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
-
- rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
- *resource_manager, *memory_manager,
- *state_tracker, *scheduler);
+ rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
+ cpu_memory, screen_info, *device,
+ *memory_manager, *state_tracker, *scheduler);
- blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
- *resource_manager, *memory_manager, *swapchain,
- *scheduler, screen_info);
+ blit_screen =
+ std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
+ *memory_manager, *swapchain, *scheduler, screen_info);
return true;
}
@@ -308,7 +327,6 @@ void RendererVulkan::ShutDown() {
scheduler.reset();
swapchain.reset();
memory_manager.reset();
- resource_manager.reset();
device.reset();
}
@@ -387,7 +405,7 @@ bool RendererVulkan::PickDevices() {
return false;
}
- const s32 device_index = Settings::values.vulkan_device;
+ const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
return false;
@@ -416,8 +434,7 @@ void RendererVulkan::Report() const {
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
- auto& telemetry_session = system.TelemetrySession();
- constexpr auto field = Telemetry::FieldType::UserSystem;
+ static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 522b5bff8..49a4141ec 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -14,7 +14,15 @@
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
-class System;
+class TelemetrySession;
+}
+
+namespace Core::Memory {
+class Memory;
+}
+
+namespace Tegra {
+class GPU;
}
namespace Vulkan {
@@ -22,9 +30,7 @@ namespace Vulkan {
class StateTracker;
class VKBlitScreen;
class VKDevice;
-class VKFence;
class VKMemoryManager;
-class VKResourceManager;
class VKSwapchain;
class VKScheduler;
class VKImage;
@@ -38,13 +44,15 @@ struct VKScreenInfo {
class RendererVulkan final : public VideoCore::RendererBase {
public:
- explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+ explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
+ Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererVulkan() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- bool TryPresent(int timeout_ms) override;
static std::vector<std::string> EnumerateDevices();
@@ -57,7 +65,9 @@ private:
void Report() const;
- Core::System& system;
+ Core::TelemetrySession& telemetry_session;
+ Core::Memory::Memory& cpu_memory;
+ Tegra::GPU& gpu;
Common::DynamicLibrary library;
vk::InstanceDispatch dld;
@@ -69,11 +79,10 @@ private:
vk::DebugCallback debug_callback;
std::unique_ptr<VKDevice> device;
- std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKMemoryManager> memory_manager;
- std::unique_ptr<VKResourceManager> resource_manager;
std::unique_ptr<StateTracker> state_tracker;
std::unique_ptr<VKScheduler> scheduler;
+ std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKBlitScreen> blit_screen;
};
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index fbd406f2b..b5b60309e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -12,11 +12,9 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
-
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
-
#include "video_core/gpu.h"
#include "video_core/morton.h"
#include "video_core/rasterizer_interface.h"
@@ -24,8 +22,8 @@
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -141,24 +139,28 @@ struct ScreenRectVertex {
std::array<f32, 2> tex_coord;
static VkVertexInputBindingDescription GetDescription() {
- VkVertexInputBindingDescription description;
- description.binding = 0;
- description.stride = sizeof(ScreenRectVertex);
- description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
- return description;
+ return {
+ .binding = 0,
+ .stride = sizeof(ScreenRectVertex),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ };
}
static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() {
- std::array<VkVertexInputAttributeDescription, 2> attributes;
- attributes[0].location = 0;
- attributes[0].binding = 0;
- attributes[0].format = VK_FORMAT_R32G32_SFLOAT;
- attributes[0].offset = offsetof(ScreenRectVertex, position);
- attributes[1].location = 1;
- attributes[1].binding = 0;
- attributes[1].format = VK_FORMAT_R32G32_SFLOAT;
- attributes[1].offset = offsetof(ScreenRectVertex, tex_coord);
- return attributes;
+ return {{
+ {
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(ScreenRectVertex, position),
+ },
+ {
+ .location = 1,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(ScreenRectVertex, tex_coord),
+ },
+ }};
}
};
@@ -183,9 +185,9 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
@@ -206,17 +208,15 @@ struct VKBlitScreen::BufferData {
// Unaligned image data goes here
};
-VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
- VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKSwapchain& swapchain, VKScheduler& scheduler,
- const VKScreenInfo& screen_info)
- : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
- scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
- watches.resize(image_count);
- std::generate(watches.begin(), watches.end(),
- []() { return std::make_unique<VKFenceWatch>(); });
+VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
+ Core::Frontend::EmuWindow& render_window_,
+ VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_,
+ VKMemoryManager& memory_manager_, VKSwapchain& swapchain_,
+ VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
+ : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
+ device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_},
+ scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
+ resource_ticks.resize(image_count);
CreateStaticResources();
CreateDynamicResources();
@@ -228,15 +228,16 @@ void VKBlitScreen::Recreate() {
CreateDynamicResources();
}
-std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated) {
+VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
const std::size_t image_index = swapchain.GetImageIndex();
- watches[image_index]->Watch(scheduler.GetFence());
+
+ scheduler.Wait(resource_ticks[image_index]);
+ resource_ticks[image_index] = scheduler.CurrentTick();
VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
@@ -255,7 +256,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
const auto pixel_format =
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
- const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
+ const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
// TODO(Rodrigo): Read this from HLE
@@ -267,20 +268,25 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
- VkBufferImageCopy copy;
- copy.bufferOffset = image_offset;
- copy.bufferRowLength = 0;
- copy.bufferImageHeight = 0;
- copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- copy.imageSubresource.mipLevel = 0;
- copy.imageSubresource.baseArrayLayer = 0;
- copy.imageSubresource.layerCount = 1;
- copy.imageOffset.x = 0;
- copy.imageOffset.y = 0;
- copy.imageOffset.z = 0;
- copy.imageExtent.width = framebuffer.width;
- copy.imageExtent.height = framebuffer.height;
- copy.imageExtent.depth = 1;
+ const VkBufferImageCopy copy{
+ .bufferOffset = image_offset,
+ .bufferRowLength = 0,
+ .bufferImageHeight = 0,
+ .imageSubresource =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = 0,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ .imageOffset = {.x = 0, .y = 0, .z = 0},
+ .imageExtent =
+ {
+ .width = framebuffer.width,
+ .height = framebuffer.height,
+ .depth = 1,
+ },
+ };
scheduler.Record(
[buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
@@ -295,11 +301,9 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
descriptor_set = descriptor_sets[image_index], buffer = *buffer,
size = swapchain.GetSize(), pipeline = *pipeline,
layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
- VkClearValue clear_color;
- clear_color.color.float32[0] = 0.0f;
- clear_color.color.float32[1] = 0.0f;
- clear_color.color.float32[2] = 0.0f;
- clear_color.color.float32[3] = 0.0f;
+ const VkClearValue clear_color{
+ .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
+ };
VkRenderPassBeginInfo renderpass_bi;
renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
@@ -336,7 +340,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
cmdbuf.EndRenderPass();
});
- return {scheduler.GetFence(), *semaphores[image_index]};
+ return *semaphores[image_index];
}
void VKBlitScreen::CreateStaticResources() {
@@ -379,93 +383,109 @@ void VKBlitScreen::CreateSemaphores() {
}
void VKBlitScreen::CreateDescriptorPool() {
- std::array<VkDescriptorPoolSize, 2> pool_sizes;
- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- pool_sizes[0].descriptorCount = static_cast<u32>(image_count);
- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
- pool_sizes[1].descriptorCount = static_cast<u32>(image_count);
-
- VkDescriptorPoolCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
- ci.maxSets = static_cast<u32>(image_count);
- ci.poolSizeCount = static_cast<u32>(pool_sizes.size());
- ci.pPoolSizes = pool_sizes.data();
+ const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
+ {
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = static_cast<u32>(image_count),
+ },
+ {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = static_cast<u32>(image_count),
+ },
+ }};
+
+ const VkDescriptorPoolCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ .maxSets = static_cast<u32>(image_count),
+ .poolSizeCount = static_cast<u32>(pool_sizes.size()),
+ .pPoolSizes = pool_sizes.data(),
+ };
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
}
void VKBlitScreen::CreateRenderPass() {
- VkAttachmentDescription color_attachment;
- color_attachment.flags = 0;
- color_attachment.format = swapchain.GetImageFormat();
- color_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
- color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
- color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
- color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
- color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
-
- VkAttachmentReference color_attachment_ref;
- color_attachment_ref.attachment = 0;
- color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-
- VkSubpassDescription subpass_description;
- subpass_description.flags = 0;
- subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- subpass_description.inputAttachmentCount = 0;
- subpass_description.pInputAttachments = nullptr;
- subpass_description.colorAttachmentCount = 1;
- subpass_description.pColorAttachments = &color_attachment_ref;
- subpass_description.pResolveAttachments = nullptr;
- subpass_description.pDepthStencilAttachment = nullptr;
- subpass_description.preserveAttachmentCount = 0;
- subpass_description.pPreserveAttachments = nullptr;
-
- VkSubpassDependency dependency;
- dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
- dependency.dstSubpass = 0;
- dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- dependency.srcAccessMask = 0;
- dependency.dstAccessMask =
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- dependency.dependencyFlags = 0;
-
- VkRenderPassCreateInfo renderpass_ci;
- renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
- renderpass_ci.pNext = nullptr;
- renderpass_ci.flags = 0;
- renderpass_ci.attachmentCount = 1;
- renderpass_ci.pAttachments = &color_attachment;
- renderpass_ci.subpassCount = 1;
- renderpass_ci.pSubpasses = &subpass_description;
- renderpass_ci.dependencyCount = 1;
- renderpass_ci.pDependencies = &dependency;
+ const VkAttachmentDescription color_attachment{
+ .flags = 0,
+ .format = swapchain.GetImageFormat(),
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+ };
+
+ const VkAttachmentReference color_attachment_ref{
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ };
+
+ const VkSubpassDescription subpass_description{
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .pInputAttachments = nullptr,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &color_attachment_ref,
+ .pResolveAttachments = nullptr,
+ .pDepthStencilAttachment = nullptr,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = nullptr,
+ };
+
+ const VkSubpassDependency dependency{
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ .dependencyFlags = 0,
+ };
+
+ const VkRenderPassCreateInfo renderpass_ci{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .attachmentCount = 1,
+ .pAttachments = &color_attachment,
+ .subpassCount = 1,
+ .pSubpasses = &subpass_description,
+ .dependencyCount = 1,
+ .pDependencies = &dependency,
+ };
renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
}
void VKBlitScreen::CreateDescriptorSetLayout() {
- std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings;
- layout_bindings[0].binding = 0;
- layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- layout_bindings[0].descriptorCount = 1;
- layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
- layout_bindings[0].pImmutableSamplers = nullptr;
- layout_bindings[1].binding = 1;
- layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
- layout_bindings[1].descriptorCount = 1;
- layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
- layout_bindings[1].pImmutableSamplers = nullptr;
-
- VkDescriptorSetLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.bindingCount = static_cast<u32>(layout_bindings.size());
- ci.pBindings = layout_bindings.data();
+ const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ }};
+
+ const VkDescriptorSetLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = static_cast<u32>(layout_bindings.size()),
+ .pBindings = layout_bindings.data(),
+ };
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
}
@@ -473,175 +493,192 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
void VKBlitScreen::CreateDescriptorSets() {
const std::vector layouts(image_count, *descriptor_set_layout);
- VkDescriptorSetAllocateInfo ai;
- ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
- ai.pNext = nullptr;
- ai.descriptorPool = *descriptor_pool;
- ai.descriptorSetCount = static_cast<u32>(image_count);
- ai.pSetLayouts = layouts.data();
+ const VkDescriptorSetAllocateInfo ai{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .descriptorPool = *descriptor_pool,
+ .descriptorSetCount = static_cast<u32>(image_count),
+ .pSetLayouts = layouts.data(),
+ };
+
descriptor_sets = descriptor_pool.Allocate(ai);
}
void VKBlitScreen::CreatePipelineLayout() {
- VkPipelineLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.setLayoutCount = 1;
- ci.pSetLayouts = descriptor_set_layout.address();
- ci.pushConstantRangeCount = 0;
- ci.pPushConstantRanges = nullptr;
+ const VkPipelineLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ };
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
}
void VKBlitScreen::CreateGraphicsPipeline() {
- std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages;
- shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- shader_stages[0].pNext = nullptr;
- shader_stages[0].flags = 0;
- shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
- shader_stages[0].module = *vertex_shader;
- shader_stages[0].pName = "main";
- shader_stages[0].pSpecializationInfo = nullptr;
- shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- shader_stages[1].pNext = nullptr;
- shader_stages[1].flags = 0;
- shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
- shader_stages[1].module = *fragment_shader;
- shader_stages[1].pName = "main";
- shader_stages[1].pSpecializationInfo = nullptr;
+ const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = *vertex_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = *fragment_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ }};
const auto vertex_binding_description = ScreenRectVertex::GetDescription();
const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
- VkPipelineVertexInputStateCreateInfo vertex_input_ci;
- vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
- vertex_input_ci.pNext = nullptr;
- vertex_input_ci.flags = 0;
- vertex_input_ci.vertexBindingDescriptionCount = 1;
- vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description;
- vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()};
- vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data();
-
- VkPipelineInputAssemblyStateCreateInfo input_assembly_ci;
- input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
- input_assembly_ci.pNext = nullptr;
- input_assembly_ci.flags = 0;
- input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
- input_assembly_ci.primitiveRestartEnable = VK_FALSE;
-
- VkPipelineViewportStateCreateInfo viewport_state_ci;
- viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
- viewport_state_ci.pNext = nullptr;
- viewport_state_ci.flags = 0;
- viewport_state_ci.viewportCount = 1;
- viewport_state_ci.pViewports = nullptr;
- viewport_state_ci.scissorCount = 1;
- viewport_state_ci.pScissors = nullptr;
-
- VkPipelineRasterizationStateCreateInfo rasterization_ci;
- rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
- rasterization_ci.pNext = nullptr;
- rasterization_ci.flags = 0;
- rasterization_ci.depthClampEnable = VK_FALSE;
- rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
- rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
- rasterization_ci.cullMode = VK_CULL_MODE_NONE;
- rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE;
- rasterization_ci.depthBiasEnable = VK_FALSE;
- rasterization_ci.depthBiasConstantFactor = 0.0f;
- rasterization_ci.depthBiasClamp = 0.0f;
- rasterization_ci.depthBiasSlopeFactor = 0.0f;
- rasterization_ci.lineWidth = 1.0f;
-
- VkPipelineMultisampleStateCreateInfo multisampling_ci;
- multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
- multisampling_ci.pNext = nullptr;
- multisampling_ci.flags = 0;
- multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
- multisampling_ci.sampleShadingEnable = VK_FALSE;
- multisampling_ci.minSampleShading = 0.0f;
- multisampling_ci.pSampleMask = nullptr;
- multisampling_ci.alphaToCoverageEnable = VK_FALSE;
- multisampling_ci.alphaToOneEnable = VK_FALSE;
-
- VkPipelineColorBlendAttachmentState color_blend_attachment;
- color_blend_attachment.blendEnable = VK_FALSE;
- color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD;
- color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD;
- color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
-
- VkPipelineColorBlendStateCreateInfo color_blend_ci;
- color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
- color_blend_ci.flags = 0;
- color_blend_ci.pNext = nullptr;
- color_blend_ci.logicOpEnable = VK_FALSE;
- color_blend_ci.logicOp = VK_LOGIC_OP_COPY;
- color_blend_ci.attachmentCount = 1;
- color_blend_ci.pAttachments = &color_blend_attachment;
- color_blend_ci.blendConstants[0] = 0.0f;
- color_blend_ci.blendConstants[1] = 0.0f;
- color_blend_ci.blendConstants[2] = 0.0f;
- color_blend_ci.blendConstants[3] = 0.0f;
-
- static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR};
- VkPipelineDynamicStateCreateInfo dynamic_state_ci;
- dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
- dynamic_state_ci.pNext = nullptr;
- dynamic_state_ci.flags = 0;
- dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size());
- dynamic_state_ci.pDynamicStates = dynamic_states.data();
-
- VkGraphicsPipelineCreateInfo pipeline_ci;
- pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
- pipeline_ci.pNext = nullptr;
- pipeline_ci.flags = 0;
- pipeline_ci.stageCount = static_cast<u32>(shader_stages.size());
- pipeline_ci.pStages = shader_stages.data();
- pipeline_ci.pVertexInputState = &vertex_input_ci;
- pipeline_ci.pInputAssemblyState = &input_assembly_ci;
- pipeline_ci.pTessellationState = nullptr;
- pipeline_ci.pViewportState = &viewport_state_ci;
- pipeline_ci.pRasterizationState = &rasterization_ci;
- pipeline_ci.pMultisampleState = &multisampling_ci;
- pipeline_ci.pDepthStencilState = nullptr;
- pipeline_ci.pColorBlendState = &color_blend_ci;
- pipeline_ci.pDynamicState = &dynamic_state_ci;
- pipeline_ci.layout = *pipeline_layout;
- pipeline_ci.renderPass = *renderpass;
- pipeline_ci.subpass = 0;
- pipeline_ci.basePipelineHandle = 0;
- pipeline_ci.basePipelineIndex = 0;
+ const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = &vertex_binding_description,
+ .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
+ .pVertexAttributeDescriptions = vertex_attrs_description.data(),
+ };
+
+ const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ const VkPipelineViewportStateCreateInfo viewport_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = 1,
+ .pViewports = nullptr,
+ .scissorCount = 1,
+ .pScissors = nullptr,
+ };
+
+ const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f,
+ };
+
+ const VkPipelineMultisampleStateCreateInfo multisampling_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .sampleShadingEnable = VK_FALSE,
+ .minSampleShading = 0.0f,
+ .pSampleMask = nullptr,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+ };
+
+ const VkPipelineColorBlendAttachmentState color_blend_attachment{
+ .blendEnable = VK_FALSE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ const VkPipelineColorBlendStateCreateInfo color_blend_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+ };
+
+ static constexpr std::array dynamic_states{
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ };
+ const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
+ .pDynamicStates = dynamic_states.data(),
+ };
+
+ const VkGraphicsPipelineCreateInfo pipeline_ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(shader_stages.size()),
+ .pStages = shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = nullptr,
+ .pViewportState = &viewport_state_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisampling_ci,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *pipeline_layout,
+ .renderPass = *renderpass,
+ .subpass = 0,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ };
pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
}
void VKBlitScreen::CreateSampler() {
- VkSamplerCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.magFilter = VK_FILTER_LINEAR;
- ci.minFilter = VK_FILTER_NEAREST;
- ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
- ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- ci.mipLodBias = 0.0f;
- ci.anisotropyEnable = VK_FALSE;
- ci.maxAnisotropy = 0.0f;
- ci.compareEnable = VK_FALSE;
- ci.compareOp = VK_COMPARE_OP_NEVER;
- ci.minLod = 0.0f;
- ci.maxLod = 0.0f;
- ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
- ci.unnormalizedCoordinates = VK_FALSE;
+ const VkSamplerCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .magFilter = VK_FILTER_LINEAR,
+ .minFilter = VK_FILTER_NEAREST,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .mipLodBias = 0.0f,
+ .anisotropyEnable = VK_FALSE,
+ .maxAnisotropy = 0.0f,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .minLod = 0.0f,
+ .maxLod = 0.0f,
+ .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
+ .unnormalizedCoordinates = VK_FALSE,
+ };
sampler = device.GetLogical().CreateSampler(ci);
}
@@ -650,15 +687,17 @@ void VKBlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
- VkFramebufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.renderPass = *renderpass;
- ci.attachmentCount = 1;
- ci.width = size.width;
- ci.height = size.height;
- ci.layers = 1;
+ VkFramebufferCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .renderPass = *renderpass,
+ .attachmentCount = 1,
+ .pAttachments = nullptr,
+ .width = size.width,
+ .height = size.height,
+ .layers = 1,
+ };
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
@@ -669,7 +708,7 @@ void VKBlitScreen::CreateFramebuffers() {
void VKBlitScreen::ReleaseRawImages() {
for (std::size_t i = 0; i < raw_images.size(); ++i) {
- watches[i]->Wait();
+ scheduler.Wait(resource_ticks.at(i));
}
raw_images.clear();
raw_buffer_commits.clear();
@@ -678,16 +717,17 @@ void VKBlitScreen::ReleaseRawImages() {
}
void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = CalculateBufferSize(framebuffer);
- ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
+ const VkBufferCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = CalculateBufferSize(framebuffer),
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ };
buffer = device.GetLogical().CreateBuffer(ci);
buffer_commit = memory_manager.Commit(buffer, true);
@@ -697,24 +737,28 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
raw_images.resize(image_count);
raw_buffer_commits.resize(image_count);
- VkImageCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.imageType = VK_IMAGE_TYPE_2D;
- ci.format = GetFormat(framebuffer);
- ci.extent.width = framebuffer.width;
- ci.extent.height = framebuffer.height;
- ci.extent.depth = 1;
- ci.mipLevels = 1;
- ci.arrayLayers = 1;
- ci.samples = VK_SAMPLE_COUNT_1_BIT;
- ci.tiling = VK_IMAGE_TILING_LINEAR;
- ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ const VkImageCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = GetFormat(framebuffer),
+ .extent =
+ {
+ .width = framebuffer.width,
+ .height = framebuffer.height,
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_LINEAR,
+ .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ };
for (std::size_t i = 0; i < image_count; ++i) {
raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT);
@@ -723,39 +767,43 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
- VkDescriptorBufferInfo buffer_info;
- buffer_info.buffer = *buffer;
- buffer_info.offset = offsetof(BufferData, uniform);
- buffer_info.range = sizeof(BufferData::uniform);
-
- VkWriteDescriptorSet ubo_write;
- ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
- ubo_write.pNext = nullptr;
- ubo_write.dstSet = descriptor_sets[image_index];
- ubo_write.dstBinding = 0;
- ubo_write.dstArrayElement = 0;
- ubo_write.descriptorCount = 1;
- ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- ubo_write.pImageInfo = nullptr;
- ubo_write.pBufferInfo = &buffer_info;
- ubo_write.pTexelBufferView = nullptr;
-
- VkDescriptorImageInfo image_info;
- image_info.sampler = *sampler;
- image_info.imageView = image_view;
- image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-
- VkWriteDescriptorSet sampler_write;
- sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
- sampler_write.pNext = nullptr;
- sampler_write.dstSet = descriptor_sets[image_index];
- sampler_write.dstBinding = 1;
- sampler_write.dstArrayElement = 0;
- sampler_write.descriptorCount = 1;
- sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
- sampler_write.pImageInfo = &image_info;
- sampler_write.pBufferInfo = nullptr;
- sampler_write.pTexelBufferView = nullptr;
+ const VkDescriptorBufferInfo buffer_info{
+ .buffer = *buffer,
+ .offset = offsetof(BufferData, uniform),
+ .range = sizeof(BufferData::uniform),
+ };
+
+ const VkWriteDescriptorSet ubo_write{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_sets[image_index],
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .pImageInfo = nullptr,
+ .pBufferInfo = &buffer_info,
+ .pTexelBufferView = nullptr,
+ };
+
+ const VkDescriptorImageInfo image_info{
+ .sampler = *sampler,
+ .imageView = image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ };
+
+ const VkWriteDescriptorSet sampler_write{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_sets[image_index],
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ };
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 243640fab..8f2839214 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -5,16 +5,18 @@
#pragma once
#include <memory>
-#include <tuple>
#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Core::Frontend {
class EmuWindow;
}
@@ -30,26 +32,26 @@ class RasterizerInterface;
namespace Vulkan {
struct ScreenInfo;
+
class RasterizerVulkan;
class VKDevice;
-class VKFence;
class VKImage;
class VKScheduler;
class VKSwapchain;
class VKBlitScreen final {
public:
- explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+ explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
+ Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKSwapchain& swapchain, VKScheduler& scheduler,
- const VKScreenInfo& screen_info);
+ VKMemoryManager& memory_manager, VKSwapchain& swapchain,
+ VKScheduler& scheduler, const VKScreenInfo& screen_info);
~VKBlitScreen();
void Recreate();
- std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated);
+ [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
+ bool use_accelerated);
private:
struct BufferData;
@@ -81,11 +83,10 @@ private:
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
- Core::System& system;
+ Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKSwapchain& swapchain;
VKScheduler& scheduler;
@@ -106,7 +107,7 @@ private:
vk::Buffer buffer;
VKMemoryCommit buffer_commit;
- std::vector<std::unique_ptr<VKFenceWatch>> watches;
+ std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<std::unique_ptr<VKImage>> raw_images;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5f33d9e40..d9d3da9ea 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -37,112 +37,88 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
} // Anonymous namespace
-CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
- VAddr cpu_addr, std::size_t size)
- : VideoCommon::BufferBlock{cpu_addr, size} {
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = static_cast<VkDeviceSize>(size);
- ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
+Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
+ VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
+ : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
+ const VkBufferCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = static_cast<VkDeviceSize>(size),
+ .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ };
buffer.handle = device.GetLogical().CreateBuffer(ci);
buffer.commit = memory_manager.Commit(buffer.handle, false);
}
-CachedBufferBlock::~CachedBufferBlock() = default;
+Buffer::~Buffer() = default;
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
- CreateStreamBuffer(device,
- scheduler)},
- device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
- staging_pool} {}
-
-VKBufferCache::~VKBufferCache() = default;
-
-Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
-}
-
-VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
- return buffer->GetHandle();
-}
-
-VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
- size = std::max(size, std::size_t(4));
- const auto& empty = staging_pool.GetUnusedBuffer(size, false);
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, size, 0);
- });
- return *empty.handle;
-}
-
-void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) {
+void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
std::memcpy(staging.commit->Map(size), data, size);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
- size](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = offset;
- barrier.size = size;
+
+ const VkBuffer handle = Handle();
+ scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
+ cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
+
+ const VkBufferMemoryBarrier barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = handle,
+ .offset = offset,
+ .size = size,
+ };
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
barrier, {});
});
}
-void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) {
+void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
- size](vk::CommandBuffer cmdbuf) {
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = offset;
- barrier.size = size;
+
+ const VkBuffer handle = Handle();
+ scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
+ const VkBufferMemoryBarrier barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = handle,
+ .offset = offset,
+ .size = size,
+ };
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
- cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size});
+ cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
});
scheduler.Finish();
std::memcpy(data, staging.commit->Map(size), size);
}
-void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) {
+void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size) {
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
- dst_offset, size](vk::CommandBuffer cmdbuf) {
+
+ const VkBuffer dst_buffer = Handle();
+ scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
+ size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
std::array<VkBufferMemoryBarrier, 2> barriers;
@@ -169,4 +145,31 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
});
}
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
+ const VKDevice& device_, VKMemoryManager& memory_manager_,
+ VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_)
+ : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory,
+ CreateStreamBuffer(device_,
+ scheduler_)},
+ device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
+ staging_pool_} {}
+
+VKBufferCache::~VKBufferCache() = default;
+
+std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+ return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
+ size);
+}
+
+VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
+ size = std::max(size, std::size_t(4));
+ const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
+ cmdbuf.FillBuffer(buffer, 0, size, 0);
+ });
+ return {*empty.handle, 0, 0};
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..7fb5ceedf 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,60 +8,57 @@
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
-namespace Core {
-class System;
-}
-
namespace Vulkan {
class VKDevice;
class VKMemoryManager;
class VKScheduler;
-class CachedBufferBlock final : public VideoCommon::BufferBlock {
+class Buffer final : public VideoCommon::BufferBlock {
public:
- explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
- VAddr cpu_addr, std::size_t size);
- ~CachedBufferBlock();
+ explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
+ VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
+ ~Buffer();
+
+ void Upload(std::size_t offset, std::size_t size, const u8* data);
+
+ void Download(std::size_t offset, std::size_t size, u8* data);
+
+ void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size);
- VkBuffer GetHandle() const {
+ VkBuffer Handle() const {
return *buffer.handle;
}
+ u64 Address() const {
+ return 0;
+ }
+
private:
+ VKScheduler& scheduler;
+ VKStagingBufferPool& staging_pool;
+
VKBuffer buffer;
};
-using Buffer = std::shared_ptr<CachedBufferBlock>;
-
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public:
- explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
- VkBuffer GetEmptyBuffer(std::size_t size) override;
+ BufferInfo GetEmptyBuffer(std::size_t size) override;
protected:
- VkBuffer ToHandle(const Buffer& buffer) override;
-
- Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
-
- void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) override;
-
- void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) override;
-
- void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) override;
+ std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
private:
const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
new file mode 100644
index 000000000..6339f4fe0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -0,0 +1,46 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstddef>
+
+#include "video_core/renderer_vulkan/vk_command_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
+
+struct CommandPool::Pool {
+ vk::CommandPool handle;
+ vk::CommandBuffers cmdbufs;
+};
+
+CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device)
+ : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {}
+
+CommandPool::~CommandPool() = default;
+
+void CommandPool::Allocate(size_t begin, size_t end) {
+ // Command buffers are going to be commited, recorded, executed every single usage cycle.
+ // They are also going to be reseted when commited.
+ Pool& pool = pools.emplace_back();
+ pool.handle = device.GetLogical().CreateCommandPool({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags =
+ VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = device.GetGraphicsFamily(),
+ });
+ pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
+}
+
+VkCommandBuffer CommandPool::Commit() {
+ const size_t index = CommitResource();
+ const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
+ const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
+ return pools[pool_index].cmdbufs[sub_index];
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
new file mode 100644
index 000000000..b9cb3fb5d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -0,0 +1,34 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+class MasterSemaphore;
+class VKDevice;
+
+class CommandPool final : public ResourcePool {
+public:
+ explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device);
+ ~CommandPool() override;
+
+ void Allocate(size_t begin, size_t end) override;
+
+ VkCommandBuffer Commit();
+
+private:
+ struct Pool;
+
+ const VKDevice& device;
+ std::vector<Pool> pools;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index da71e710c..9637c6059 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -112,35 +112,36 @@ constexpr u8 quad_array[] = {
0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+ 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
- VkDescriptorSetLayoutBinding binding;
- binding.binding = 0;
- binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- binding.descriptorCount = 1;
- binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- binding.pImmutableSamplers = nullptr;
- return binding;
+ return {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ };
}
VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
- VkDescriptorUpdateTemplateEntryKHR entry;
- entry.dstBinding = 0;
- entry.dstArrayElement = 0;
- entry.descriptorCount = 1;
- entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- entry.offset = 0;
- entry.stride = sizeof(DescriptorUpdateEntry);
- return entry;
+ return {
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .offset = 0,
+ .stride = sizeof(DescriptorUpdateEntry),
+ };
}
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
- VkPushConstantRange range;
- range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- range.offset = 0;
- range.size = static_cast<u32>(size);
- return range;
+ return {
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .offset = 0,
+ .size = static_cast<u32>(size),
+ };
}
// Uint8 SPIR-V module. Generated from the "shaders/" directory.
@@ -218,7 +219,8 @@ constexpr u8 uint8_pass[] = {
0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+ 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
constexpr u8 QUAD_INDEXED_SPV[] = {
@@ -341,32 +343,37 @@ constexpr u8 QUAD_INDEXED_SPV[] = {
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+ 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
- std::array<VkDescriptorSetLayoutBinding, 2> bindings;
- bindings[0].binding = 0;
- bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- bindings[0].descriptorCount = 1;
- bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- bindings[0].pImmutableSamplers = nullptr;
- bindings[1].binding = 1;
- bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- bindings[1].descriptorCount = 1;
- bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- bindings[1].pImmutableSamplers = nullptr;
- return bindings;
+ return {{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ }};
}
VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
- VkDescriptorUpdateTemplateEntryKHR entry;
- entry.dstBinding = 0;
- entry.dstArrayElement = 0;
- entry.descriptorCount = 2;
- entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- entry.offset = 0;
- entry.stride = sizeof(DescriptorUpdateEntry);
- return entry;
+ return {
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 2,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .offset = 0,
+ .stride = sizeof(DescriptorUpdateEntry),
+ };
}
} // Anonymous namespace
@@ -376,37 +383,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
const u8* code) {
- VkDescriptorSetLayoutCreateInfo descriptor_layout_ci;
- descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- descriptor_layout_ci.pNext = nullptr;
- descriptor_layout_ci.flags = 0;
- descriptor_layout_ci.bindingCount = bindings.size();
- descriptor_layout_ci.pBindings = bindings.data();
- descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci);
-
- VkPipelineLayoutCreateInfo pipeline_layout_ci;
- pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- pipeline_layout_ci.pNext = nullptr;
- pipeline_layout_ci.flags = 0;
- pipeline_layout_ci.setLayoutCount = 1;
- pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address();
- pipeline_layout_ci.pushConstantRangeCount = push_constants.size();
- pipeline_layout_ci.pPushConstantRanges = push_constants.data();
- layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci);
+ descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = bindings.size(),
+ .pBindings = bindings.data(),
+ });
+
+ layout = device.GetLogical().CreatePipelineLayout({
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = push_constants.size(),
+ .pPushConstantRanges = push_constants.data(),
+ });
if (!templates.empty()) {
- VkDescriptorUpdateTemplateCreateInfoKHR template_ci;
- template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
- template_ci.pNext = nullptr;
- template_ci.flags = 0;
- template_ci.descriptorUpdateEntryCount = templates.size();
- template_ci.pDescriptorUpdateEntries = templates.data();
- template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
- template_ci.descriptorSetLayout = *descriptor_set_layout;
- template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- template_ci.pipelineLayout = *layout;
- template_ci.set = 0;
- descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci);
+ descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = templates.size(),
+ .pDescriptorUpdateEntries = templates.data(),
+ .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+ .descriptorSetLayout = *descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = *layout,
+ .set = 0,
+ });
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
}
@@ -414,42 +421,42 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
std::memcpy(code_copy.get(), code, code_size);
- VkShaderModuleCreateInfo module_ci;
- module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- module_ci.pNext = nullptr;
- module_ci.flags = 0;
- module_ci.codeSize = code_size;
- module_ci.pCode = code_copy.get();
- module = device.GetLogical().CreateShaderModule(module_ci);
-
- VkComputePipelineCreateInfo pipeline_ci;
- pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
- pipeline_ci.pNext = nullptr;
- pipeline_ci.flags = 0;
- pipeline_ci.layout = *layout;
- pipeline_ci.basePipelineHandle = nullptr;
- pipeline_ci.basePipelineIndex = 0;
-
- VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage;
- stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- stage_ci.pNext = nullptr;
- stage_ci.flags = 0;
- stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
- stage_ci.module = *module;
- stage_ci.pName = "main";
- stage_ci.pSpecializationInfo = nullptr;
-
- pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci);
+ module = device.GetLogical().CreateShaderModule({
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .codeSize = code_size,
+ .pCode = code_copy.get(),
+ });
+
+ pipeline = device.GetLogical().CreateComputePipeline({
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage =
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *module,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ .layout = *layout,
+ .basePipelineHandle = nullptr,
+ .basePipelineIndex = 0,
+ });
}
VKComputePass::~VKComputePass() = default;
-VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
- VKFence& fence) {
+VkDescriptorSet VKComputePass::CommitDescriptorSet(
+ VKUpdateDescriptorQueue& update_descriptor_queue) {
if (!descriptor_template) {
return nullptr;
}
- const auto set = descriptor_allocator->Commit(fence);
+ const VkDescriptorSet set = descriptor_allocator->Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
@@ -473,7 +480,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
@@ -516,13 +523,13 @@ Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
- const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
+ const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
@@ -585,7 +592,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 230b526bc..acc94f27e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -15,7 +15,6 @@
namespace Vulkan {
class VKDevice;
-class VKFence;
class VKScheduler;
class VKStagingBufferPool;
class VKUpdateDescriptorQueue;
@@ -30,8 +29,7 @@ public:
~VKComputePass();
protected:
- VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
- VKFence& fence);
+ VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 8e1b46277..9be72dc9b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -32,7 +32,7 @@ VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
- const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+ const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
@@ -43,39 +43,41 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
// TODO(Rodrigo): Maybe make individual bindings here?
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
- VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
- entry.binding = binding++;
- entry.descriptorType = descriptor_type;
- entry.descriptorCount = 1;
- entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- entry.pImmutableSamplers = nullptr;
+ bindings.push_back({
+ .binding = binding++,
+ .descriptorType = descriptor_type,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ });
}
};
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
+ add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
+ add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
- VkDescriptorSetLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.bindingCount = static_cast<u32>(bindings.size());
- ci.pBindings = bindings.data();
- return device.GetLogical().CreateDescriptorSetLayout(ci);
+ return device.GetLogical().CreateDescriptorSetLayout({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = static_cast<u32>(bindings.size()),
+ .pBindings = bindings.data(),
+ });
}
vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
- VkPipelineLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.setLayoutCount = 1;
- ci.pSetLayouts = descriptor_set_layout.address();
- ci.pushConstantRangeCount = 0;
- ci.pPushConstantRanges = nullptr;
- return device.GetLogical().CreatePipelineLayout(ci);
+ return device.GetLogical().CreatePipelineLayout({
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ });
}
vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
@@ -88,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat
return {};
}
- VkDescriptorUpdateTemplateCreateInfoKHR ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size());
- ci.pDescriptorUpdateEntries = template_entries.data();
- ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
- ci.descriptorSetLayout = *descriptor_set_layout;
- ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- ci.pipelineLayout = *layout;
- ci.set = DESCRIPTOR_SET;
- return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
+ return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
+ .pDescriptorUpdateEntries = template_entries.data(),
+ .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+ .descriptorSetLayout = *descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = *layout,
+ .set = DESCRIPTOR_SET,
+ });
}
vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
device.SaveShader(code);
- VkShaderModuleCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.codeSize = code.size() * sizeof(u32);
- ci.pCode = code.data();
- return device.GetLogical().CreateShaderModule(ci);
+ return device.GetLogical().CreateShaderModule({
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .codeSize = code.size() * sizeof(u32),
+ .pCode = code.data(),
+ });
}
vk::Pipeline VKComputePipeline::CreatePipeline() const {
- VkComputePipelineCreateInfo ci;
- VkPipelineShaderStageCreateInfo& stage_ci = ci.stage;
- stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- stage_ci.pNext = nullptr;
- stage_ci.flags = 0;
- stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
- stage_ci.module = *shader_module;
- stage_ci.pName = "main";
- stage_ci.pSpecializationInfo = nullptr;
-
- VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
- subgroup_size_ci.sType =
- VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT;
- subgroup_size_ci.pNext = nullptr;
- subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+
+ VkComputePipelineCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage =
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *shader_module,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ .layout = *layout,
+ .basePipelineHandle = nullptr,
+ .basePipelineIndex = 0,
+ };
+
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .requiredSubgroupSize = GuestWarpSize,
+ };
if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
- stage_ci.pNext = &subgroup_size_ci;
+ ci.stage.pNext = &subgroup_size_ci;
}
- ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.layout = *layout;
- ci.basePipelineHandle = nullptr;
- ci.basePipelineIndex = 0;
return device.GetLogical().CreateComputePipeline(ci);
}
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 890fd52cf..f38e089d5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -7,7 +7,8 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
@@ -15,14 +16,15 @@ namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
constexpr std::size_t SETS_GROW_RATE = 0x20;
-DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
- VkDescriptorSetLayout layout)
- : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
+ VkDescriptorSetLayout layout_)
+ : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
+ descriptor_pool{descriptor_pool_}, layout{layout_} {}
DescriptorAllocator::~DescriptorAllocator() = default;
-VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
- const std::size_t index = CommitResource(fence);
+VkDescriptorSet DescriptorAllocator::Commit() {
+ const std::size_t index = CommitResource();
return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
@@ -30,8 +32,9 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
}
-VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
- : device{device}, active_pool{AllocateNewPool()} {}
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler)
+ : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
+ AllocateNewPool()} {}
VKDescriptorPool::~VKDescriptorPool() = default;
@@ -42,27 +45,31 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
-
- VkDescriptorPoolCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
- ci.maxSets = num_sets;
- ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes));
- ci.pPoolSizes = std::data(pool_sizes);
+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
+ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
+ };
+
+ const VkDescriptorPoolCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ .maxSets = num_sets,
+ .poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
+ .pPoolSizes = std::data(pool_sizes),
+ };
return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
}
vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
std::size_t count) {
const std::vector layout_copies(count, layout);
- VkDescriptorSetAllocateInfo ai;
- ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
- ai.pNext = nullptr;
- ai.descriptorPool = **active_pool;
- ai.descriptorSetCount = static_cast<u32>(count);
- ai.pSetLayouts = layout_copies.data();
+ VkDescriptorSetAllocateInfo ai{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .descriptorPool = **active_pool,
+ .descriptorSetCount = static_cast<u32>(count),
+ .pSetLayouts = layout_copies.data(),
+ };
vk::DescriptorSets sets = active_pool->Allocate(ai);
if (!sets.IsOutOfPoolMemory()) {
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index 9efa66bef..544f32a20 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -6,21 +6,24 @@
#include <vector>
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
+class VKDevice;
class VKDescriptorPool;
+class VKScheduler;
-class DescriptorAllocator final : public VKFencedPool {
+class DescriptorAllocator final : public ResourcePool {
public:
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
~DescriptorAllocator() override;
+ DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
- VkDescriptorSet Commit(VKFence& fence);
+ VkDescriptorSet Commit();
protected:
void Allocate(std::size_t begin, std::size_t end) override;
@@ -36,15 +39,19 @@ class VKDescriptorPool final {
friend DescriptorAllocator;
public:
- explicit VKDescriptorPool(const VKDevice& device);
+ explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler);
~VKDescriptorPool();
+ VKDescriptorPool(const VKDescriptorPool&) = delete;
+ VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
+
private:
vk::DescriptorPool* AllocateNewPool();
vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
const VKDevice& device;
+ MasterSemaphore& master_semaphore;
std::vector<vk::DescriptorPool> pools;
vk::DescriptorPool* active_pool;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 9fd8ac3f6..05e31f1de 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -22,19 +22,27 @@ namespace {
namespace Alternatives {
-constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT,
- VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}};
-constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT,
- VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}};
+constexpr std::array Depth24UnormS8_UINT{
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_D16_UNORM_S8_UINT,
+ VkFormat{},
+};
+
+constexpr std::array Depth16UnormS8_UINT{
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VkFormat{},
+};
} // namespace Alternatives
-constexpr std::array REQUIRED_EXTENSIONS = {
+constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
+ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
@@ -77,14 +85,19 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_UINT_PACK32,
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_SINT_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_B5G6R5_UNORM_PACK16,
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+ VK_FORMAT_A2B10G10R10_UINT_PACK32,
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_SINT,
VK_FORMAT_R32G32B32A32_UINT,
VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32_SINT,
VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_SNORM,
VK_FORMAT_R16G16B16A16_UNORM,
@@ -96,8 +109,11 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_R8G8B8A8_SRGB,
VK_FORMAT_R8G8_UNORM,
VK_FORMAT_R8G8_SNORM,
+ VK_FORMAT_R8G8_SINT,
VK_FORMAT_R8G8_UINT,
VK_FORMAT_R8_UNORM,
+ VK_FORMAT_R8_SNORM,
+ VK_FORMAT_R8_SINT,
VK_FORMAT_R8_UINT,
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
VK_FORMAT_R32_SFLOAT,
@@ -117,6 +133,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_BC2_UNORM_BLOCK,
VK_FORMAT_BC3_UNORM_BLOCK,
VK_FORMAT_BC4_UNORM_BLOCK,
+ VK_FORMAT_BC4_SNORM_BLOCK,
VK_FORMAT_BC5_UNORM_BLOCK,
VK_FORMAT_BC5_SNORM_BLOCK,
VK_FORMAT_BC7_UNORM_BLOCK,
@@ -169,97 +186,111 @@ bool VKDevice::Create() {
const auto queue_cis = GetDeviceQueueCreateInfos();
const std::vector extensions = LoadExtensions();
- VkPhysicalDeviceFeatures2 features2;
- features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
- features2.pNext = nullptr;
+ VkPhysicalDeviceFeatures2 features2{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+ .pNext = nullptr,
+ };
const void* first_next = &features2;
void** next = &features2.pNext;
- auto& features = features2.features;
- features.robustBufferAccess = false;
- features.fullDrawIndexUint32 = false;
- features.imageCubeArray = false;
- features.independentBlend = true;
- features.geometryShader = true;
- features.tessellationShader = true;
- features.sampleRateShading = false;
- features.dualSrcBlend = false;
- features.logicOp = false;
- features.multiDrawIndirect = false;
- features.drawIndirectFirstInstance = false;
- features.depthClamp = true;
- features.depthBiasClamp = true;
- features.fillModeNonSolid = false;
- features.depthBounds = false;
- features.wideLines = false;
- features.largePoints = true;
- features.alphaToOne = false;
- features.multiViewport = true;
- features.samplerAnisotropy = true;
- features.textureCompressionETC2 = false;
- features.textureCompressionASTC_LDR = is_optimal_astc_supported;
- features.textureCompressionBC = false;
- features.occlusionQueryPrecise = true;
- features.pipelineStatisticsQuery = false;
- features.vertexPipelineStoresAndAtomics = true;
- features.fragmentStoresAndAtomics = true;
- features.shaderTessellationAndGeometryPointSize = false;
- features.shaderImageGatherExtended = true;
- features.shaderStorageImageExtendedFormats = false;
- features.shaderStorageImageMultisample = false;
- features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
- features.shaderStorageImageWriteWithoutFormat = true;
- features.shaderUniformBufferArrayDynamicIndexing = false;
- features.shaderSampledImageArrayDynamicIndexing = false;
- features.shaderStorageBufferArrayDynamicIndexing = false;
- features.shaderStorageImageArrayDynamicIndexing = false;
- features.shaderClipDistance = false;
- features.shaderCullDistance = false;
- features.shaderFloat64 = false;
- features.shaderInt64 = false;
- features.shaderInt16 = false;
- features.shaderResourceResidency = false;
- features.shaderResourceMinLod = false;
- features.sparseBinding = false;
- features.sparseResidencyBuffer = false;
- features.sparseResidencyImage2D = false;
- features.sparseResidencyImage3D = false;
- features.sparseResidency2Samples = false;
- features.sparseResidency4Samples = false;
- features.sparseResidency8Samples = false;
- features.sparseResidency16Samples = false;
- features.sparseResidencyAliased = false;
- features.variableMultisampleRate = false;
- features.inheritedQueries = false;
-
- VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage;
- bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
- bit16_storage.pNext = nullptr;
- bit16_storage.storageBuffer16BitAccess = false;
- bit16_storage.uniformAndStorageBuffer16BitAccess = true;
- bit16_storage.storagePushConstant16 = false;
- bit16_storage.storageInputOutput16 = false;
+ features2.features = {
+ .robustBufferAccess = false,
+ .fullDrawIndexUint32 = false,
+ .imageCubeArray = false,
+ .independentBlend = true,
+ .geometryShader = true,
+ .tessellationShader = true,
+ .sampleRateShading = false,
+ .dualSrcBlend = false,
+ .logicOp = false,
+ .multiDrawIndirect = false,
+ .drawIndirectFirstInstance = false,
+ .depthClamp = true,
+ .depthBiasClamp = true,
+ .fillModeNonSolid = false,
+ .depthBounds = false,
+ .wideLines = false,
+ .largePoints = true,
+ .alphaToOne = false,
+ .multiViewport = true,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = false,
+ .textureCompressionASTC_LDR = is_optimal_astc_supported,
+ .textureCompressionBC = false,
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = false,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = false,
+ .shaderImageGatherExtended = true,
+ .shaderStorageImageExtendedFormats = false,
+ .shaderStorageImageMultisample = false,
+ .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
+ .shaderStorageImageWriteWithoutFormat = true,
+ .shaderUniformBufferArrayDynamicIndexing = false,
+ .shaderSampledImageArrayDynamicIndexing = false,
+ .shaderStorageBufferArrayDynamicIndexing = false,
+ .shaderStorageImageArrayDynamicIndexing = false,
+ .shaderClipDistance = false,
+ .shaderCullDistance = false,
+ .shaderFloat64 = false,
+ .shaderInt64 = false,
+ .shaderInt16 = false,
+ .shaderResourceResidency = false,
+ .shaderResourceMinLod = false,
+ .sparseBinding = false,
+ .sparseResidencyBuffer = false,
+ .sparseResidencyImage2D = false,
+ .sparseResidencyImage3D = false,
+ .sparseResidency2Samples = false,
+ .sparseResidency4Samples = false,
+ .sparseResidency8Samples = false,
+ .sparseResidency16Samples = false,
+ .sparseResidencyAliased = false,
+ .variableMultisampleRate = false,
+ .inheritedQueries = false,
+ };
+
+ VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
+ .pNext = nullptr,
+ .timelineSemaphore = true,
+ };
+ SetNext(next, timeline_semaphore);
+
+ VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
+ .pNext = nullptr,
+ .storageBuffer16BitAccess = false,
+ .uniformAndStorageBuffer16BitAccess = true,
+ .storagePushConstant16 = false,
+ .storageInputOutput16 = false,
+ };
SetNext(next, bit16_storage);
- VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage;
- bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR;
- bit8_storage.pNext = nullptr;
- bit8_storage.storageBuffer8BitAccess = false;
- bit8_storage.uniformAndStorageBuffer8BitAccess = true;
- bit8_storage.storagePushConstant8 = false;
+ VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR,
+ .pNext = nullptr,
+ .storageBuffer8BitAccess = false,
+ .uniformAndStorageBuffer8BitAccess = true,
+ .storagePushConstant8 = false,
+ };
SetNext(next, bit8_storage);
- VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
- host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT;
- host_query_reset.hostQueryReset = true;
+ VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
+ .hostQueryReset = true,
+ };
SetNext(next, host_query_reset);
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
- float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
- float16_int8.pNext = nullptr;
- float16_int8.shaderFloat16 = true;
- float16_int8.shaderInt8 = false;
+ float16_int8 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
+ .pNext = nullptr,
+ .shaderFloat16 = true,
+ .shaderInt8 = false,
+ };
SetNext(next, float16_int8);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
@@ -271,10 +302,11 @@ bool VKDevice::Create() {
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
if (khr_uniform_buffer_standard_layout) {
- std430_layout.sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR;
- std430_layout.pNext = nullptr;
- std430_layout.uniformBufferStandardLayout = true;
+ std430_layout = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR,
+ .pNext = nullptr,
+ .uniformBufferStandardLayout = true,
+ };
SetNext(next, std430_layout);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
@@ -282,9 +314,11 @@ bool VKDevice::Create() {
VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
if (ext_index_type_uint8) {
- index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT;
- index_type_uint8.pNext = nullptr;
- index_type_uint8.indexTypeUint8 = true;
+ index_type_uint8 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT,
+ .pNext = nullptr,
+ .indexTypeUint8 = true,
+ };
SetNext(next, index_type_uint8);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
@@ -292,11 +326,12 @@ bool VKDevice::Create() {
VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
if (ext_transform_feedback) {
- transform_feedback.sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
- transform_feedback.pNext = nullptr;
- transform_feedback.transformFeedback = true;
- transform_feedback.geometryStreams = true;
+ transform_feedback = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
+ .pNext = nullptr,
+ .transformFeedback = true,
+ .geometryStreams = true,
+ };
SetNext(next, transform_feedback);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
@@ -304,15 +339,29 @@ bool VKDevice::Create() {
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border;
if (ext_custom_border_color) {
- custom_border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
- custom_border.pNext = nullptr;
- custom_border.customBorderColors = VK_TRUE;
- custom_border.customBorderColorWithoutFormat = VK_TRUE;
+ custom_border = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
+ .pNext = nullptr,
+ .customBorderColors = VK_TRUE,
+ .customBorderColorWithoutFormat = VK_TRUE,
+ };
SetNext(next, custom_border);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
}
+ VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
+ if (ext_extended_dynamic_state) {
+ dynamic_state = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT,
+ .pNext = nullptr,
+ .extendedDynamicState = VK_TRUE,
+ };
+ SetNext(next, dynamic_state);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -321,11 +370,13 @@ bool VKDevice::Create() {
if (nv_device_diagnostics_config) {
nsight_aftermath_tracker.Initialize();
- diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV;
- diagnostics_nv.pNext = &features2;
- diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
- VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
- VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV;
+ diagnostics_nv = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV,
+ .pNext = &features2,
+ .flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
+ VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
+ VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV,
+ };
first_next = &diagnostics_nv;
}
@@ -337,8 +388,18 @@ bool VKDevice::Create() {
CollectTelemetryParameters();
+ if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR) {
+ // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but the <stride> field
+ // seems to be bugged. Blacklisting it for now.
+ LOG_WARNING(Render_Vulkan,
+ "Blacklisting AMD proprietary from VK_EXT_extended_dynamic_state");
+ ext_extended_dynamic_state = false;
+ }
+
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
+
+ use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
return true;
}
@@ -541,6 +602,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
+ bool has_ext_extended_dynamic_state{};
for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) {
Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
Test(extension, khr_uniform_buffer_standard_layout,
@@ -558,6 +620,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
false);
Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME,
false);
+ Test(extension, has_ext_extended_dynamic_state,
+ VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostics_config,
VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
@@ -643,6 +707,19 @@ std::vector<const char*> VKDevice::LoadExtensions() {
}
}
+ if (has_ext_extended_dynamic_state) {
+ VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
+ dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+ dynamic_state.pNext = nullptr;
+ features.pNext = &dynamic_state;
+ physical.GetFeatures2KHR(features);
+
+ if (dynamic_state.extendedDynamicState) {
+ extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
+ ext_extended_dynamic_state = true;
+ }
+ }
+
return extensions;
}
@@ -678,13 +755,15 @@ void VKDevice::SetupFeatures() {
}
void VKDevice::CollectTelemetryParameters() {
- VkPhysicalDeviceDriverPropertiesKHR driver;
- driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR;
- driver.pNext = nullptr;
+ VkPhysicalDeviceDriverPropertiesKHR driver{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
+ .pNext = nullptr,
+ };
- VkPhysicalDeviceProperties2KHR properties;
- properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
- properties.pNext = &driver;
+ VkPhysicalDeviceProperties2KHR properties{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
+ .pNext = &driver,
+ };
physical.GetProperties2KHR(properties);
driver_id = driver.driverID;
@@ -693,23 +772,26 @@ void VKDevice::CollectTelemetryParameters() {
const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
reported_extensions.reserve(std::size(extensions));
for (const auto& extension : extensions) {
- reported_extensions.push_back(extension.extensionName);
+ reported_extensions.emplace_back(extension.extensionName);
}
}
std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
static constexpr float QUEUE_PRIORITY = 1.0f;
- std::unordered_set<u32> unique_queue_families = {graphics_family, present_family};
+ std::unordered_set<u32> unique_queue_families{graphics_family, present_family};
std::vector<VkDeviceQueueCreateInfo> queue_cis;
+ queue_cis.reserve(unique_queue_families.size());
for (const u32 queue_family : unique_queue_families) {
- VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back();
- ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.queueFamilyIndex = queue_family;
- ci.queueCount = 1;
+ auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .queueFamilyIndex = queue_family,
+ .queueCount = 1,
+ .pQueuePriorities = nullptr,
+ });
ci.pQueuePriorities = &QUEUE_PRIORITY;
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 6b9227b09..26a233db1 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,6 +122,11 @@ public:
return properties.limits.maxPushConstantsSize;
}
+ /// Returns the maximum size for shared memory.
+ u32 GetMaxComputeSharedMemorySize() const {
+ return properties.limits.maxComputeSharedMemorySize;
+ }
+
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -182,6 +187,11 @@ public:
return ext_custom_border_color;
}
+ /// Returns true if the device supports VK_EXT_extended_dynamic_state.
+ bool IsExtExtendedDynamicStateSupported() const {
+ return ext_extended_dynamic_state;
+ }
+
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@@ -192,6 +202,11 @@ public:
return reported_extensions;
}
+ /// Returns true if the setting for async shader compilation is enabled.
+ bool UseAsynchronousShaders() const {
+ return use_asynchronous_shaders;
+ }
+
/// Checks if the physical device is suitable.
static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
@@ -239,8 +254,12 @@ private:
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
+ bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
+ // Asynchronous Graphics Pipeline setting
+ bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
+
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index a02be5487..5babbdd0b 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -30,7 +30,7 @@ void InnerFence::Queue() {
ASSERT(!event);
event = device.GetLogical().CreateEvent();
- ticks = scheduler.Ticks();
+ ticks = scheduler.CurrentTick();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
@@ -52,7 +52,7 @@ void InnerFence::Wait() {
}
ASSERT(event);
- if (ticks >= scheduler.Ticks()) {
+ if (ticks >= scheduler.CurrentTick()) {
scheduler.Flush();
}
while (!IsEventSignalled()) {
@@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const {
}
}
-VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
- device{device}, scheduler{scheduler} {}
+VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+ VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+ const VKDevice& device_, VKScheduler& scheduler_)
+ : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache),
+ device{device_}, scheduler{scheduler_} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 043fe7947..1547d6d30 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -55,10 +55,10 @@ using GenericFenceManager =
class VKFenceManager final : public GenericFenceManager {
public:
- explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache);
+ explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+ VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+ const VKDevice& device, VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 69b6bba00..a4b9e7ef5 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -28,15 +28,15 @@ namespace {
template <class StencilFace>
VkStencilOpState GetStencilFaceState(const StencilFace& face) {
- VkStencilOpState state;
- state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail());
- state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass());
- state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail());
- state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc());
- state.compareMask = 0;
- state.writeMask = 0;
- state.reference = 0;
- return state;
+ return {
+ .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
+ .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
+ .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
+ .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
+ .compareMask = 0,
+ .writeMask = 0,
+ .reference = 0,
+ };
}
bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
@@ -52,20 +52,21 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
}
VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
- union {
+ union Swizzle {
u32 raw;
BitField<0, 3, Maxwell::ViewportSwizzle> x;
BitField<4, 3, Maxwell::ViewportSwizzle> y;
BitField<8, 3, Maxwell::ViewportSwizzle> z;
BitField<12, 3, Maxwell::ViewportSwizzle> w;
- } const unpacked{swizzle};
-
- VkViewportSwizzleNV result;
- result.x = MaxwellToVK::ViewportSwizzle(unpacked.x);
- result.y = MaxwellToVK::ViewportSwizzle(unpacked.y);
- result.z = MaxwellToVK::ViewportSwizzle(unpacked.z);
- result.w = MaxwellToVK::ViewportSwizzle(unpacked.w);
- return result;
+ };
+ const Swizzle unpacked{swizzle};
+
+ return {
+ .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
+ .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
+ .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
+ .w = MaxwellToVK::ViewportSwizzle(unpacked.w),
+ };
}
} // Anonymous namespace
@@ -77,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
const SPIRVProgram& program)
- : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+ : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
program)},
- renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
- key.renderpass_params,
- program)} {}
+ renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
+ pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
@@ -93,31 +93,33 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
- const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+ const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
- VkDescriptorSetLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.bindingCount = bindings.size();
- ci.pBindings = bindings.data();
+ const VkDescriptorSetLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = bindings.size(),
+ .pBindings = bindings.data(),
+ };
return device.GetLogical().CreateDescriptorSetLayout(ci);
}
vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
- VkPipelineLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.setLayoutCount = 1;
- ci.pSetLayouts = descriptor_set_layout.address();
- ci.pushConstantRangeCount = 0;
- ci.pPushConstantRanges = nullptr;
+ const VkPipelineLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ };
return device.GetLogical().CreatePipelineLayout(ci);
}
@@ -136,26 +138,28 @@ vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTempla
return {};
}
- VkDescriptorUpdateTemplateCreateInfoKHR ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size());
- ci.pDescriptorUpdateEntries = template_entries.data();
- ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
- ci.descriptorSetLayout = *descriptor_set_layout;
- ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- ci.pipelineLayout = *layout;
- ci.set = DESCRIPTOR_SET;
+ const VkDescriptorUpdateTemplateCreateInfoKHR ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
+ .pDescriptorUpdateEntries = template_entries.data(),
+ .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+ .descriptorSetLayout = *descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = *layout,
+ .set = DESCRIPTOR_SET,
+ };
return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
}
std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
const SPIRVProgram& program) const {
- VkShaderModuleCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
+ VkShaderModuleCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ };
std::vector<vk::ShaderModule> modules;
modules.reserve(Maxwell::MaxShaderStage);
@@ -176,38 +180,52 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const {
- const auto& vi = fixed_state.vertex_input;
- const auto& ds = fixed_state.depth_stencil;
- const auto& cd = fixed_state.color_blending;
- const auto& rs = fixed_state.rasterizer;
- const auto& viewport_swizzles = fixed_state.viewport_swizzles.swizzles;
+ const auto& state = cache_key.fixed_state;
+ const auto& viewport_swizzles = state.viewport_swizzles;
+
+ FixedPipelineState::DynamicState dynamic;
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ // Insert dummy values, as long as they are valid they don't matter as extended dynamic
+ // state is ignored
+ dynamic.raw1 = 0;
+ dynamic.raw2 = 0;
+ for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) {
+ // Enable all vertex bindings
+ binding.raw = 0;
+ binding.enabled.Assign(1);
+ }
+ } else {
+ dynamic = state.dynamic_state;
+ }
std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
- for (std::size_t index = 0; index < std::size(vi.bindings); ++index) {
- const auto& binding = vi.bindings[index];
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const auto& binding = dynamic.vertex_bindings[index];
if (!binding.enabled) {
continue;
}
- const bool instanced = vi.binding_divisors[index] != 0;
+ const bool instanced = state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
- auto& vertex_binding = vertex_bindings.emplace_back();
- vertex_binding.binding = static_cast<u32>(index);
- vertex_binding.stride = binding.stride;
- vertex_binding.inputRate = rate;
+ vertex_bindings.push_back({
+ .binding = static_cast<u32>(index),
+ .stride = binding.stride,
+ .inputRate = rate,
+ });
if (instanced) {
- auto& binding_divisor = vertex_binding_divisors.emplace_back();
- binding_divisor.binding = static_cast<u32>(index);
- binding_divisor.divisor = vi.binding_divisors[index];
+ vertex_binding_divisors.push_back({
+ .binding = static_cast<u32>(index),
+ .divisor = state.binding_divisors[index],
+ });
}
}
std::vector<VkVertexInputAttributeDescription> vertex_attributes;
const auto& input_attributes = program[0]->entries.attributes;
- for (std::size_t index = 0; index < std::size(vi.attributes); ++index) {
- const auto& attribute = vi.attributes[index];
+ for (std::size_t index = 0; index < state.attributes.size(); ++index) {
+ const auto& attribute = state.attributes[index];
if (!attribute.enabled) {
continue;
}
@@ -215,116 +233,133 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
// Skip attributes not used by the vertex shaders.
continue;
}
- auto& vertex_attribute = vertex_attributes.emplace_back();
- vertex_attribute.location = static_cast<u32>(index);
- vertex_attribute.binding = attribute.buffer;
- vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size());
- vertex_attribute.offset = attribute.offset;
+ vertex_attributes.push_back({
+ .location = static_cast<u32>(index),
+ .binding = attribute.buffer,
+ .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
+ .offset = attribute.offset,
+ });
}
- VkPipelineVertexInputStateCreateInfo vertex_input_ci;
- vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
- vertex_input_ci.pNext = nullptr;
- vertex_input_ci.flags = 0;
- vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size());
- vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data();
- vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size());
- vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data();
-
- VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci;
- input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT;
- input_divisor_ci.pNext = nullptr;
- input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size());
- input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data();
+ VkPipelineVertexInputStateCreateInfo vertex_input_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
+ .pVertexBindingDescriptions = vertex_bindings.data(),
+ .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
+ .pVertexAttributeDescriptions = vertex_attributes.data(),
+ };
+
+ const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
+ .pVertexBindingDivisors = vertex_binding_divisors.data(),
+ };
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &input_divisor_ci;
}
- VkPipelineInputAssemblyStateCreateInfo input_assembly_ci;
- input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
- input_assembly_ci.pNext = nullptr;
- input_assembly_ci.flags = 0;
- input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology());
- input_assembly_ci.primitiveRestartEnable =
- rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology);
-
- VkPipelineTessellationStateCreateInfo tessellation_ci;
- tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
- tessellation_ci.pNext = nullptr;
- tessellation_ci.flags = 0;
- tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1;
-
- VkPipelineViewportStateCreateInfo viewport_ci;
- viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
- viewport_ci.pNext = nullptr;
- viewport_ci.flags = 0;
- viewport_ci.viewportCount = Maxwell::NumViewports;
- viewport_ci.pViewports = nullptr;
- viewport_ci.scissorCount = Maxwell::NumViewports;
- viewport_ci.pScissors = nullptr;
+ const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology());
+ const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()),
+ .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
+ SupportsPrimitiveRestart(input_assembly_topology),
+ };
+
+ const VkPipelineTessellationStateCreateInfo tessellation_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
+ };
+
+ VkPipelineViewportStateCreateInfo viewport_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = Maxwell::NumViewports,
+ .pViewports = nullptr,
+ .scissorCount = Maxwell::NumViewports,
+ .pScissors = nullptr,
+ };
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(),
UnpackViewportSwizzle);
- VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci;
- swizzle_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV;
- swizzle_ci.pNext = nullptr;
- swizzle_ci.flags = 0;
- swizzle_ci.viewportCount = Maxwell::NumViewports;
- swizzle_ci.pViewportSwizzles = swizzles.data();
+ VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = Maxwell::NumViewports,
+ .pViewportSwizzles = swizzles.data(),
+ };
if (device.IsNvViewportSwizzleSupported()) {
viewport_ci.pNext = &swizzle_ci;
}
- VkPipelineRasterizationStateCreateInfo rasterization_ci;
- rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
- rasterization_ci.pNext = nullptr;
- rasterization_ci.flags = 0;
- rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE;
- rasterization_ci.rasterizerDiscardEnable = rs.rasterize_enable == 0 ? VK_TRUE : VK_FALSE;
- rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
- rasterization_ci.cullMode =
- rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE;
- rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace());
- rasterization_ci.depthBiasEnable = rs.depth_bias_enable;
- rasterization_ci.depthBiasConstantFactor = 0.0f;
- rasterization_ci.depthBiasClamp = 0.0f;
- rasterization_ci.depthBiasSlopeFactor = 0.0f;
- rasterization_ci.lineWidth = 1.0f;
-
- VkPipelineMultisampleStateCreateInfo multisample_ci;
- multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
- multisample_ci.pNext = nullptr;
- multisample_ci.flags = 0;
- multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
- multisample_ci.sampleShadingEnable = VK_FALSE;
- multisample_ci.minSampleShading = 0.0f;
- multisample_ci.pSampleMask = nullptr;
- multisample_ci.alphaToCoverageEnable = VK_FALSE;
- multisample_ci.alphaToOneEnable = VK_FALSE;
-
- VkPipelineDepthStencilStateCreateInfo depth_stencil_ci;
- depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
- depth_stencil_ci.pNext = nullptr;
- depth_stencil_ci.flags = 0;
- depth_stencil_ci.depthTestEnable = ds.depth_test_enable;
- depth_stencil_ci.depthWriteEnable = ds.depth_write_enable;
- depth_stencil_ci.depthCompareOp =
- ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS;
- depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable;
- depth_stencil_ci.stencilTestEnable = ds.stencil_enable;
- depth_stencil_ci.front = GetStencilFaceState(ds.front);
- depth_stencil_ci.back = GetStencilFaceState(ds.back);
- depth_stencil_ci.minDepthBounds = 0.0f;
- depth_stencil_ci.maxDepthBounds = 0.0f;
+ const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthClampEnable =
+ static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+ .rasterizerDiscardEnable =
+ static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode =
+ dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE,
+ .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
+ .depthBiasEnable = state.depth_bias_enable,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f,
+ };
+
+ const VkPipelineMultisampleStateCreateInfo multisample_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .sampleShadingEnable = VK_FALSE,
+ .minSampleShading = 0.0f,
+ .pSampleMask = nullptr,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+ };
+
+ const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthTestEnable = dynamic.depth_test_enable,
+ .depthWriteEnable = dynamic.depth_write_enable,
+ .depthCompareOp = dynamic.depth_test_enable
+ ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
+ : VK_COMPARE_OP_ALWAYS,
+ .depthBoundsTestEnable = dynamic.depth_bounds_enable,
+ .stencilTestEnable = dynamic.stencil_enable,
+ .front = GetStencilFaceState(dynamic.front),
+ .back = GetStencilFaceState(dynamic.back),
+ .minDepthBounds = 0.0f,
+ .maxDepthBounds = 0.0f,
+ };
std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments);
for (std::size_t index = 0; index < num_attachments; ++index) {
- static constexpr std::array COMPONENT_TABLE = {
- VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
- VK_COLOR_COMPONENT_A_BIT};
- const auto& blend = cd.attachments[index];
+ static constexpr std::array COMPONENT_TABLE{
+ VK_COLOR_COMPONENT_R_BIT,
+ VK_COLOR_COMPONENT_G_BIT,
+ VK_COLOR_COMPONENT_B_BIT,
+ VK_COLOR_COMPONENT_A_BIT,
+ };
+ const auto& blend = state.attachments[index];
VkColorComponentFlags color_components = 0;
for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
@@ -333,45 +368,63 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
}
}
- VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index];
- attachment.blendEnable = blend.enable != 0;
- attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor());
- attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor());
- attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB());
- attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor());
- attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor());
- attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha());
- attachment.colorWriteMask = color_components;
+ cb_attachments[index] = {
+ .blendEnable = blend.enable != 0,
+ .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
+ .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
+ .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
+ .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
+ .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
+ .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
+ .colorWriteMask = color_components,
+ };
}
- VkPipelineColorBlendStateCreateInfo color_blend_ci;
- color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
- color_blend_ci.pNext = nullptr;
- color_blend_ci.flags = 0;
- color_blend_ci.logicOpEnable = VK_FALSE;
- color_blend_ci.logicOp = VK_LOGIC_OP_COPY;
- color_blend_ci.attachmentCount = static_cast<u32>(num_attachments);
- color_blend_ci.pAttachments = cb_attachments.data();
- std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants));
-
- static constexpr std::array dynamic_states = {
+ const VkPipelineColorBlendStateCreateInfo color_blend_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = static_cast<u32>(num_attachments),
+ .pAttachments = cb_attachments.data(),
+ };
+
+ std::vector dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
- VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE};
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ };
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ static constexpr std::array extended{
+ VK_DYNAMIC_STATE_CULL_MODE_EXT,
+ VK_DYNAMIC_STATE_FRONT_FACE_EXT,
+ VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT,
+ VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
+ VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
+ VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
+ VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
+ VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
+ VK_DYNAMIC_STATE_STENCIL_OP_EXT,
+ };
+ dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
+ }
- VkPipelineDynamicStateCreateInfo dynamic_state_ci;
- dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
- dynamic_state_ci.pNext = nullptr;
- dynamic_state_ci.flags = 0;
- dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size());
- dynamic_state_ci.pDynamicStates = dynamic_states.data();
+ const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
+ .pDynamicStates = dynamic_states.data(),
+ };
- VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
- subgroup_size_ci.sType =
- VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT;
- subgroup_size_ci.pNext = nullptr;
- subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .requiredSubgroupSize = GuestWarpSize,
+ };
std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
std::size_t module_index = 0;
@@ -379,6 +432,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
if (!program[stage]) {
continue;
}
+
VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stage_ci.pNext = nullptr;
@@ -393,26 +447,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
}
}
- VkGraphicsPipelineCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.stageCount = static_cast<u32>(shader_stages.size());
- ci.pStages = shader_stages.data();
- ci.pVertexInputState = &vertex_input_ci;
- ci.pInputAssemblyState = &input_assembly_ci;
- ci.pTessellationState = &tessellation_ci;
- ci.pViewportState = &viewport_ci;
- ci.pRasterizationState = &rasterization_ci;
- ci.pMultisampleState = &multisample_ci;
- ci.pDepthStencilState = &depth_stencil_ci;
- ci.pColorBlendState = &color_blend_ci;
- ci.pDynamicState = &dynamic_state_ci;
- ci.layout = *layout;
- ci.renderPass = renderpass;
- ci.subpass = 0;
- ci.basePipelineHandle = nullptr;
- ci.basePipelineIndex = 0;
+ const VkGraphicsPipelineCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(shader_stages.size()),
+ .pStages = shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = &tessellation_ci,
+ .pViewportState = &viewport_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisample_ci,
+ .pDepthStencilState = &depth_stencil_ci,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *layout,
+ .renderPass = renderpass,
+ .subpass = 0,
+ .basePipelineHandle = nullptr,
+ .basePipelineIndex = 0,
+ };
return device.GetLogical().CreateGraphicsPipeline(ci);
}
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index a1d699a6c..58aa35efd 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -19,7 +19,27 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-struct GraphicsPipelineCacheKey;
+struct GraphicsPipelineCacheKey {
+ RenderPassParams renderpass_params;
+ u32 padding;
+ std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+ FixedPipelineState fixed_state;
+
+ std::size_t Hash() const noexcept;
+
+ bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+ bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ std::size_t Size() const noexcept {
+ return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+ }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
class VKDescriptorPool;
class VKDevice;
@@ -54,6 +74,10 @@ public:
return renderpass;
}
+ GraphicsPipelineCacheKey GetCacheKey() const {
+ return cache_key;
+ }
+
private:
vk::DescriptorSetLayout CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@@ -70,7 +94,7 @@ private:
const VKDevice& device;
VKScheduler& scheduler;
- const FixedPipelineState fixed_state;
+ const GraphicsPipelineCacheKey cache_key;
const u64 hash;
vk::DescriptorSetLayout descriptor_set_layout;
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
index 9bceb3861..1c418ea17 100644
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ b/src/video_core/renderer_vulkan/vk_image.cpp
@@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num
void VKImage::CreatePresentView() {
// Image type has to be 2D to be presented.
- VkImageViewCreateInfo image_view_ci;
- image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
- image_view_ci.pNext = nullptr;
- image_view_ci.flags = 0;
- image_view_ci.image = *image;
- image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
- image_view_ci.format = format;
- image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
- image_view_ci.subresourceRange.aspectMask = aspect_mask;
- image_view_ci.subresourceRange.baseMipLevel = 0;
- image_view_ci.subresourceRange.levelCount = 1;
- image_view_ci.subresourceRange.baseArrayLayer = 0;
- image_view_ci.subresourceRange.layerCount = 1;
- present_view = device.GetLogical().CreateImageView(image_view_ci);
+ present_view = device.GetLogical().CreateImageView({
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = *image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = format,
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ });
}
VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
new file mode 100644
index 000000000..ae26e558d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -0,0 +1,56 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <atomic>
+#include <chrono>
+
+#include "core/settings.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+using namespace std::chrono_literals;
+
+MasterSemaphore::MasterSemaphore(const VKDevice& device) {
+ static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
+ .initialValue = 0,
+ };
+ static constexpr VkSemaphoreCreateInfo semaphore_ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &semaphore_type_ci,
+ .flags = 0,
+ };
+ semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
+
+ if (!Settings::values.renderer_debug) {
+ return;
+ }
+ // Validation layers have a bug where they fail to track resource usage when using timeline
+ // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
+ // a separate thread waiting for each timeline semaphore value.
+ debug_thread = std::thread([this] {
+ u64 counter = 0;
+ while (!shutdown) {
+ if (semaphore.Wait(counter, 10'000'000)) {
+ ++counter;
+ }
+ }
+ });
+}
+
+MasterSemaphore::~MasterSemaphore() {
+ shutdown = true;
+
+ // This thread might not be started
+ if (debug_thread.joinable()) {
+ debug_thread.join();
+ }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
new file mode 100644
index 000000000..0e93706d7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <thread>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+class MasterSemaphore {
+public:
+ explicit MasterSemaphore(const VKDevice& device);
+ ~MasterSemaphore();
+
+ /// Returns the current logical tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept {
+ return current_tick;
+ }
+
+ /// Returns the timeline semaphore handle.
+ [[nodiscard]] VkSemaphore Handle() const noexcept {
+ return *semaphore;
+ }
+
+ /// Returns true when a tick has been hit by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) {
+ return gpu_tick >= tick;
+ }
+
+ /// Advance to the logical tick.
+ void NextTick() noexcept {
+ ++current_tick;
+ }
+
+ /// Refresh the known GPU tick
+ void Refresh() {
+ gpu_tick = semaphore.GetCounter();
+ }
+
+ /// Waits for a tick to be hit on the GPU
+ void Wait(u64 tick) {
+ // No need to wait if the GPU is ahead of the tick
+ if (IsFree(tick)) {
+ return;
+ }
+ // Update the GPU tick and try again
+ Refresh();
+ if (IsFree(tick)) {
+ return;
+ }
+ // If none of the above is hit, fallback to a regular wait
+ semaphore.Wait(tick);
+ }
+
+private:
+ vk::Semaphore semaphore; ///< Timeline semaphore.
+ std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
+ std::atomic<u64> current_tick{1}; ///< Current logical tick.
+ std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed.
+ std::thread debug_thread; ///< Debug thread to workaround validation layer bugs.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index b4c650a63..24c8960ac 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t
}();
// Try to allocate found type.
- VkMemoryAllocateInfo memory_ai;
- memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
- memory_ai.pNext = nullptr;
- memory_ai.allocationSize = size;
- memory_ai.memoryTypeIndex = type;
-
- vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai);
+ vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .allocationSize = size,
+ .memoryTypeIndex = type,
+ });
if (!memory) {
LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
return false;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 65a1c6245..5c038f4bc 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,8 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h"
+#include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
namespace Vulkan {
@@ -45,6 +47,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
@@ -86,12 +89,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi
// Combined image samplers can be arrayed.
count = container[i].size;
}
- VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
- entry.binding = binding++;
- entry.descriptorType = descriptor_type;
- entry.descriptorCount = count;
- entry.stageFlags = stage_flags;
- entry.pImmutableSamplers = nullptr;
+ bindings.push_back({
+ .binding = binding++,
+ .descriptorType = descriptor_type,
+ .descriptorCount = count,
+ .stageFlags = stage_flags,
+ .pImmutableSamplers = nullptr,
+ });
}
}
@@ -104,8 +108,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
u32 binding = base_binding;
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
- AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
+ AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
+ AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
return binding;
}
@@ -113,12 +118,12 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
} // Anonymous namespace
std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
- const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+ const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<std::size_t>(hash);
}
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
- return std::memcmp(&rhs, this, sizeof *this) == 0;
+ return std::memcmp(&rhs, this, Size()) == 0;
}
std::size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -130,92 +135,106 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
-CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
- GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
- u32 main_offset)
- : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
- registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
- compiler_settings, registry},
- entries{GenerateShaderEntries(shader_ir)} {}
-
-CachedShader::~CachedShader() = default;
-
-Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
- Core::System& system, Tegra::Engines::ShaderType stage) {
- if (stage == Tegra::Engines::ShaderType::Compute) {
- return system.GPU().KeplerCompute();
- } else {
- return system.GPU().Maxwell3D();
- }
-}
-
-VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- VKRenderPassCache& renderpass_cache)
- : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
- descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
- renderpass_cache{renderpass_cache} {}
+Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage,
+ GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_,
+ u32 main_offset)
+ : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine),
+ shader_ir(program_code, main_offset, compiler_settings, registry),
+ entries(GenerateShaderEntries(shader_ir)) {}
+
+Shader::~Shader() = default;
+
+VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
+ VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKRenderPassCache& renderpass_cache_)
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
+ scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+ update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {}
VKPipelineCache::~VKPipelineCache() = default;
-std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
- const auto& gpu = system.GPU().Maxwell3D();
+std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+ std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
- std::array<Shader, Maxwell::MaxShaderProgram> shaders;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr program_addr{GetShaderAddress(system, program)};
- const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
- if (!shader) {
- const auto host_ptr{memory_manager.GetPointer(program_addr)};
+
+ Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
+ if (!result) {
+ const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
// No shader found - create a new one
- constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
- const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+ static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
+ const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
+ std::move(code), stage_offset);
+ result = shader.get();
- shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
- std::move(code), stage_offset);
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader), *cpu_addr, size_in_bytes);
} else {
- null_shader = shader;
+ null_shader = std::move(shader);
}
}
- shaders[index] = std::move(shader);
+ shaders[index] = result;
}
return last_shaders = shaders;
}
-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
+ const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
- return *last_graphics_pipeline;
+ return last_graphics_pipeline;
}
last_graphics_key = key;
+ if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
+ std::unique_lock lock{pipeline_cache};
+ const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+ if (is_cache_miss) {
+ gpu.ShaderNotify().MarkSharderBuilding();
+ LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+ const auto [program, bindings] = DecompileShaders(key.fixed_state);
+ async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
+ update_descriptor_queue, renderpass_cache, bindings,
+ program, key);
+ }
+ last_graphics_pipeline = pair->second.get();
+ return last_graphics_pipeline;
+ }
+
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
+ gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- const auto [program, bindings] = DecompileShaders(key);
+ const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key,
bindings, program);
+ gpu.ShaderNotify().MarkShaderComplete();
}
- return *(last_graphics_pipeline = entry.get());
+ last_graphics_pipeline = entry.get();
+ return last_graphics_pipeline;
}
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
@@ -228,32 +247,39 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- auto& memory_manager = system.GPU().MemoryManager();
- const auto program_addr = key.shader;
+ const GPUVAddr gpu_addr = key.shader;
- const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
+ Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
- const auto host_ptr = memory_manager.GetPointer(program_addr);
+ const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
+
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
+ *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
+ shader = shader_info.get();
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
- shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
- program_addr, *cpu_addr, std::move(code),
- KERNEL_MAIN_OFFSET);
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader_info), *cpu_addr, size_in_bytes);
} else {
- null_kernel = shader;
+ null_kernel = std::move(shader_info);
}
}
- Specialization specialization;
- specialization.workgroup_size = key.workgroup_size;
- specialization.shared_memory_size = key.shared_memory_size;
-
+ const Specialization specialization{
+ .base_binding = 0,
+ .workgroup_size = key.workgroup_size,
+ .shared_memory_size = key.shared_memory_size,
+ .point_size = std::nullopt,
+ .enabled_attributes = {},
+ .attribute_types = {},
+ .ndc_minus_one_to_one = false,
+ };
const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
shader->GetRegistry(), specialization),
shader->GetEntries()};
@@ -262,7 +288,13 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
return *entry;
}
-void VKPipelineCache::Unregister(const Shader& shader) {
+void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
+ gpu.ShaderNotify().MarkShaderComplete();
+ std::unique_lock lock{pipeline_cache};
+ graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+}
+
+void VKPipelineCache::OnShaderRemoval(Shader* shader) {
bool finished = false;
const auto Finish = [&] {
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -294,29 +326,24 @@ void VKPipelineCache::Unregister(const Shader& shader) {
Finish();
it = compute_cache.erase(it);
}
-
- RasterizerCache::Unregister(shader);
}
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
- const auto& fixed_state = key.fixed_state;
- auto& memory_manager = system.GPU().MemoryManager();
- const auto& gpu = system.GPU().Maxwell3D();
-
+VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
Specialization specialization;
- if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) {
+ if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
+ device.IsExtExtendedDynamicStateSupported()) {
float point_size;
- std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float));
+ std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
specialization.point_size = point_size;
ASSERT(point_size != 0.0f);
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
- const auto& attribute = fixed_state.vertex_input.attributes[i];
+ const auto& attribute = fixed_state.attributes[i];
specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
specialization.attribute_types[i] = attribute.Type();
}
- specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
+ specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
SPIRVProgram program;
std::vector<VkDescriptorSetLayoutBinding> bindings;
@@ -325,17 +352,16 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
- const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
- const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
- const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
- ASSERT(shader);
+ const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
- const auto program_type = GetShaderType(program_enum);
+ const ShaderType program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
@@ -363,13 +389,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
for (u32 i = 0; i < count; ++i) {
const u32 num_samplers = container[i].size;
- VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
- entry.dstBinding = binding;
- entry.dstArrayElement = 0;
- entry.descriptorCount = num_samplers;
- entry.descriptorType = descriptor_type;
- entry.offset = offset;
- entry.stride = entry_size;
+ template_entries.push_back({
+ .dstBinding = binding,
+ .dstArrayElement = 0,
+ .descriptorCount = num_samplers,
+ .descriptorType = descriptor_type,
+ .offset = offset,
+ .stride = entry_size,
+ });
++binding;
offset += num_samplers * entry_size;
@@ -377,26 +404,29 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
return;
}
- if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
- // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
- // crash.
+ if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
+ descriptor_type == STORAGE_TEXEL_BUFFER) {
+ // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
+ // Note: Fixed in driver Windows 443.24, Linux 440.66.15
for (u32 i = 0; i < count; ++i) {
- VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
- entry.dstBinding = binding + i;
- entry.dstArrayElement = 0;
- entry.descriptorCount = 1;
- entry.descriptorType = descriptor_type;
- entry.offset = offset + i * entry_size;
- entry.stride = entry_size;
+ template_entries.push_back({
+ .dstBinding = binding + i,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = descriptor_type,
+ .offset = static_cast<std::size_t>(offset + i * entry_size),
+ .stride = entry_size,
+ });
}
} else if (count > 0) {
- VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
- entry.dstBinding = binding;
- entry.dstArrayElement = 0;
- entry.descriptorCount = count;
- entry.descriptorType = descriptor_type;
- entry.offset = offset;
- entry.stride = entry_size;
+ template_entries.push_back({
+ .dstBinding = binding,
+ .dstArrayElement = 0,
+ .descriptorCount = count,
+ .descriptorType = descriptor_type,
+ .offset = offset,
+ .stride = entry_size,
+ });
}
offset += count * entry_size;
binding += count;
@@ -407,8 +437,9 @@ void FillDescriptorUpdateTemplateEntries(
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
- AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
+ AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
+ AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
}
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..e558e6658 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,15 +17,16 @@
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace Core {
class System;
@@ -37,32 +38,11 @@ class RasterizerVulkan;
class VKComputePipeline;
class VKDescriptorPool;
class VKDevice;
-class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
-class CachedShader;
-using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-struct GraphicsPipelineCacheKey {
- FixedPipelineState fixed_state;
- RenderPassParams renderpass_params;
- std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
- u64 padding; // This is necessary for unique object representations
-
- std::size_t Hash() const noexcept;
-
- bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-
- bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
- return !operator==(rhs);
- }
-};
-static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
-
struct ComputePipelineCacheKey {
GPUVAddr shader;
u32 shared_memory_size;
@@ -102,22 +82,22 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
-class CachedShader final : public RasterizerCacheObject {
+class Shader {
public:
- explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
- u32 main_offset);
- ~CachedShader();
+ explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine,
+ Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
+ ~Shader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
- std::size_t GetSizeInBytes() const override {
- return program_code.size() * sizeof(u64);
+ VideoCommon::Shader::ShaderIR& GetIR() {
+ return shader_ir;
}
- VideoCommon::Shader::ShaderIR& GetIR() {
+ const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
@@ -125,18 +105,11 @@ public:
return registry;
}
- const VideoCommon::Shader::ShaderIR& GetIR() const {
- return shader_ir;
- }
-
const ShaderEntries& GetEntries() const {
return entries;
}
private:
- static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
- Tegra::Engines::ShaderType stage);
-
GPUVAddr gpu_addr{};
VideoCommon::Shader::ProgramCode program_code;
VideoCommon::Shader::Registry registry;
@@ -144,45 +117,53 @@ private:
ShaderEntries entries;
};
-class VKPipelineCache final : public RasterizerCache<Shader> {
+class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
+ explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const VKDevice& device,
+ VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache);
- ~VKPipelineCache();
+ ~VKPipelineCache() override;
- std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+ std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
- VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+ VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+ VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
-protected:
- void Unregister(const Shader& shader) override;
+ void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
- void FlushObjectInner(const Shader& object) override {}
+protected:
+ void OnShaderRemoval(Shader* shader) final;
private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
- const GraphicsPipelineCacheKey& key);
+ const FixedPipelineState& fixed_state);
+
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
- Core::System& system;
const VKDevice& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache;
- Shader null_shader{};
- Shader null_kernel{};
+ std::unique_ptr<Shader> null_shader;
+ std::unique_ptr<Shader> null_kernel;
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+ std::mutex pipeline_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
graphics_cache;
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index bc91c48cc..ee2d871e3 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -9,35 +9,33 @@
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
+using VideoCore::QueryType;
+
namespace {
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
-constexpr VkQueryType GetTarget(VideoCore::QueryType type) {
+constexpr VkQueryType GetTarget(QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
-QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
+QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_)
+ : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
QueryPool::~QueryPool() = default;
-void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
- device = &device_;
- type = type_;
-}
-
-std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
+std::pair<VkQueryPool, u32> QueryPool::Commit() {
std::size_t index;
do {
- index = CommitResource(fence);
+ index = CommitResource();
} while (usage[index]);
usage[index] = true;
@@ -47,14 +45,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
- VkQueryPoolCreateInfo query_pool_ci;
- query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
- query_pool_ci.pNext = nullptr;
- query_pool_ci.flags = 0;
- query_pool_ci.queryType = GetTarget(type);
- query_pool_ci.queryCount = static_cast<u32>(end - begin);
- query_pool_ci.pipelineStatistics = 0;
- pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci));
+ pools.push_back(device.GetLogical().CreateQueryPool({
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .queryType = GetTarget(type),
+ .queryCount = static_cast<u32>(end - begin),
+ .pipelineStatistics = 0,
+ }));
}
void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
@@ -68,30 +66,39 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
-VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler)
- : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
- QueryPool>{system, rasterizer},
- device{device}, scheduler{scheduler} {
- for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
- query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
+ : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream,
+ HostCounter>{rasterizer, maxwell3d, gpu_memory},
+ device{device}, scheduler{scheduler}, query_pools{
+ QueryPool{device, scheduler,
+ QueryType::SamplesPassed},
+ } {}
+
+VKQueryCache::~VKQueryCache() {
+ // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
+ // destructor is called. The query cache should be redesigned to have a proper ownership model
+ // instead of using shared pointers.
+ for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
+ auto& stream = Stream(static_cast<QueryType>(query_type));
+ stream.Update(false);
+ stream.Reset();
}
}
-VKQueryCache::~VKQueryCache() = default;
-
-std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
- return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
+std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) {
+ return query_pools[static_cast<std::size_t>(type)].Commit();
}
-void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) {
+void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type)
+ QueryType type)
: VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
- type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
+ type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} {
const vk::Device* logical = &cache.Device().GetLogical();
cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
logical->ResetQueryPoolEXT(query.first, query.second, 1);
@@ -109,7 +116,7 @@ void HostCounter::EndQuery() {
}
u64 HostCounter::BlockingQuery() const {
- if (ticks >= cache.Scheduler().Ticks()) {
+ if (tick >= cache.Scheduler().CurrentTick()) {
cache.Scheduler().Flush();
}
u64 data;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 40119e6d3..2e57fb75d 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -11,7 +11,7 @@
#include "common/common_types.h"
#include "video_core/query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace VideoCore {
@@ -28,14 +28,12 @@ class VKScheduler;
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
-class QueryPool final : public VKFencedPool {
+class QueryPool final : public ResourcePool {
public:
- explicit QueryPool();
+ explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type);
~QueryPool() override;
- void Initialize(const VKDevice& device, VideoCore::QueryType type);
-
- std::pair<VkQueryPool, u32> Commit(VKFence& fence);
+ std::pair<VkQueryPool, u32> Commit();
void Reserve(std::pair<VkQueryPool, u32> query);
@@ -45,18 +43,18 @@ protected:
private:
static constexpr std::size_t GROW_STEP = 512;
- const VKDevice* device = nullptr;
- VideoCore::QueryType type = {};
+ const VKDevice& device;
+ const VideoCore::QueryType type;
std::vector<vk::QueryPool> pools;
std::vector<bool> usage;
};
class VKQueryCache final
- : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
- QueryPool> {
+ : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler);
~VKQueryCache();
@@ -75,6 +73,7 @@ public:
private:
const VKDevice& device;
VKScheduler& scheduler;
+ std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
@@ -91,7 +90,7 @@ private:
VKQueryCache& cache;
const VideoCore::QueryType type;
const std::pair<VkQueryPool, u32> query;
- const u64 ticks;
+ const u64 tick;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index a3d992ed3..f3c2483c8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -14,6 +14,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/engines/kepler_compute.h"
@@ -30,7 +31,6 @@
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -38,6 +38,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader_cache.h"
namespace Vulkan {
@@ -63,20 +64,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
const float height = src.scale_y * 2.0f;
+ const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
- VkViewport viewport;
- viewport.x = src.translate_x - src.scale_x;
- viewport.y = src.translate_y - src.scale_y;
- viewport.width = width != 0.0f ? width : 1.0f;
- viewport.height = height != 0.0f ? height : 1.0f;
+ VkViewport viewport{
+ .x = src.translate_x - src.scale_x,
+ .y = src.translate_y - src.scale_y,
+ .width = width != 0.0f ? width : 1.0f,
+ .height = height != 0.0f ? height : 1.0f,
+ .minDepth = src.translate_z - src.scale_z * reduce_z,
+ .maxDepth = src.translate_z + src.scale_z,
+ };
- const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
- viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
- viewport.maxDepth = src.translate_z + src.scale_z;
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
}
+
return viewport;
}
@@ -98,7 +101,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
}
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
for (std::size_t i = 0; i < std::size(addresses); ++i) {
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +120,17 @@ template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
std::size_t stage, std::size_t index = 0) {
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+ if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+ if (entry.is_separated) {
+ const u32 buffer_1 = entry.buffer;
+ const u32 buffer_2 = entry.secondary_buffer;
+ const u32 offset_1 = entry.offset;
+ const u32 offset_2 = entry.secondary_offset;
+ const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
+ return engine.GetTextureInfo(handle_1 | handle_2);
+ }
+ }
if (entry.is_bindless) {
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
return engine.GetTextureInfo(tex_handle);
@@ -131,13 +145,65 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
}
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
+ if (!is_clear) {
+ return true;
+ }
+ // First we have to make sure all clear masks are enabled.
+ if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
+ !regs.clear_buffers.A) {
+ return true;
+ }
+ // If scissors are disabled, the whole screen is cleared
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Then we have to confirm scissor testing clears the whole image
+ const std::size_t index = regs.clear_buffers.RT;
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
+ scissor.max_y < regs.rt[index].height;
+}
+
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
+ // If we are not clearing, the contents have to be preserved
+ if (!is_clear) {
+ return true;
+ }
+ // For depth stencil clears we only have to confirm scissor test covers the whole image
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Make sure the clear cover the whole image
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
+ scissor.max_y < regs.zeta_height;
+}
+
+template <std::size_t N>
+std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
+ std::array<VkDeviceSize, N> expanded;
+ std::copy(strides.begin(), strides.end(), expanded.begin());
+ return expanded;
+}
+
} // Anonymous namespace
class BufferBindings final {
public:
- void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) {
+ void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {
vertex.buffers[vertex.num_buffers] = buffer;
vertex.offsets[vertex.num_buffers] = offset;
+ vertex.sizes[vertex.num_buffers] = size;
+ vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);
++vertex.num_buffers;
}
@@ -147,76 +213,76 @@ public:
index.type = type;
}
- void Bind(VKScheduler& scheduler) const {
+ void Bind(const VKDevice& device, VKScheduler& scheduler) const {
// Use this large switch case to avoid dispatching more memory in the record lambda than
// what we need. It looks horrible, but it's the best we can do on standard C++.
switch (vertex.num_buffers) {
case 0:
- return BindStatic<0>(scheduler);
+ return BindStatic<0>(device, scheduler);
case 1:
- return BindStatic<1>(scheduler);
+ return BindStatic<1>(device, scheduler);
case 2:
- return BindStatic<2>(scheduler);
+ return BindStatic<2>(device, scheduler);
case 3:
- return BindStatic<3>(scheduler);
+ return BindStatic<3>(device, scheduler);
case 4:
- return BindStatic<4>(scheduler);
+ return BindStatic<4>(device, scheduler);
case 5:
- return BindStatic<5>(scheduler);
+ return BindStatic<5>(device, scheduler);
case 6:
- return BindStatic<6>(scheduler);
+ return BindStatic<6>(device, scheduler);
case 7:
- return BindStatic<7>(scheduler);
+ return BindStatic<7>(device, scheduler);
case 8:
- return BindStatic<8>(scheduler);
+ return BindStatic<8>(device, scheduler);
case 9:
- return BindStatic<9>(scheduler);
+ return BindStatic<9>(device, scheduler);
case 10:
- return BindStatic<10>(scheduler);
+ return BindStatic<10>(device, scheduler);
case 11:
- return BindStatic<11>(scheduler);
+ return BindStatic<11>(device, scheduler);
case 12:
- return BindStatic<12>(scheduler);
+ return BindStatic<12>(device, scheduler);
case 13:
- return BindStatic<13>(scheduler);
+ return BindStatic<13>(device, scheduler);
case 14:
- return BindStatic<14>(scheduler);
+ return BindStatic<14>(device, scheduler);
case 15:
- return BindStatic<15>(scheduler);
+ return BindStatic<15>(device, scheduler);
case 16:
- return BindStatic<16>(scheduler);
+ return BindStatic<16>(device, scheduler);
case 17:
- return BindStatic<17>(scheduler);
+ return BindStatic<17>(device, scheduler);
case 18:
- return BindStatic<18>(scheduler);
+ return BindStatic<18>(device, scheduler);
case 19:
- return BindStatic<19>(scheduler);
+ return BindStatic<19>(device, scheduler);
case 20:
- return BindStatic<20>(scheduler);
+ return BindStatic<20>(device, scheduler);
case 21:
- return BindStatic<21>(scheduler);
+ return BindStatic<21>(device, scheduler);
case 22:
- return BindStatic<22>(scheduler);
+ return BindStatic<22>(device, scheduler);
case 23:
- return BindStatic<23>(scheduler);
+ return BindStatic<23>(device, scheduler);
case 24:
- return BindStatic<24>(scheduler);
+ return BindStatic<24>(device, scheduler);
case 25:
- return BindStatic<25>(scheduler);
+ return BindStatic<25>(device, scheduler);
case 26:
- return BindStatic<26>(scheduler);
+ return BindStatic<26>(device, scheduler);
case 27:
- return BindStatic<27>(scheduler);
+ return BindStatic<27>(device, scheduler);
case 28:
- return BindStatic<28>(scheduler);
+ return BindStatic<28>(device, scheduler);
case 29:
- return BindStatic<29>(scheduler);
+ return BindStatic<29>(device, scheduler);
case 30:
- return BindStatic<30>(scheduler);
+ return BindStatic<30>(device, scheduler);
case 31:
- return BindStatic<31>(scheduler);
+ return BindStatic<31>(device, scheduler);
case 32:
- return BindStatic<32>(scheduler);
+ return BindStatic<32>(device, scheduler);
}
UNREACHABLE();
}
@@ -227,6 +293,8 @@ private:
std::size_t num_buffers = 0;
std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
+ std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
+ std::array<u16, Maxwell::NumVertexArrays> strides;
} vertex;
struct {
@@ -236,15 +304,23 @@ private:
} index;
template <std::size_t N>
- void BindStatic(VKScheduler& scheduler) const {
- if (index.buffer) {
- BindStatic<N, true>(scheduler);
+ void BindStatic(const VKDevice& device, VKScheduler& scheduler) const {
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ if (index.buffer) {
+ BindStatic<N, true, true>(scheduler);
+ } else {
+ BindStatic<N, false, true>(scheduler);
+ }
} else {
- BindStatic<N, false>(scheduler);
+ if (index.buffer) {
+ BindStatic<N, true, false>(scheduler);
+ } else {
+ BindStatic<N, false, false>(scheduler);
+ }
}
}
- template <std::size_t N, bool is_indexed>
+ template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state>
void BindStatic(VKScheduler& scheduler) const {
static_assert(N <= Maxwell::NumVertexArrays);
if constexpr (N == 0) {
@@ -256,6 +332,31 @@ private:
std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
+ if constexpr (has_extended_dynamic_state) {
+ // With extended dynamic states we can specify the length and stride of a vertex buffer
+ std::array<VkDeviceSize, N> sizes;
+ std::array<u16, N> strides;
+ std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
+ std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
+
+ if constexpr (is_indexed) {
+ scheduler.Record(
+ [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
+ cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
+ offsets.data(), sizes.data(),
+ ExpandStrides(strides).data());
+ });
+ } else {
+ scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
+ offsets.data(), sizes.data(),
+ ExpandStrides(strides).data());
+ });
+ }
+ return;
+ }
+
if constexpr (is_indexed) {
// Indexed draw
scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
@@ -279,28 +380,32 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
}
}
-RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
- VKScreenInfo& screen_info, const VKDevice& device,
- VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, StateTracker& state_tracker,
- VKScheduler& scheduler)
- : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
- screen_info{screen_info}, device{device}, resource_manager{resource_manager},
- memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
- staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
- update_descriptor_queue(device, scheduler), renderpass_cache(device),
+RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+ Tegra::MemoryManager& gpu_memory_,
+ Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_,
+ const VKDevice& device_, VKMemoryManager& memory_manager_,
+ StateTracker& state_tracker_, VKScheduler& scheduler_)
+ : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_),
+ maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_),
+ device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_),
+ scheduler(scheduler_), staging_pool(device, memory_manager, scheduler),
+ descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler),
+ renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
- staging_pool),
- pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
- renderpass_cache),
- buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
- sampler_cache(device),
- fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
- query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
+ texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool),
+ pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+ descriptor_pool, update_descriptor_queue, renderpass_cache),
+ buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool),
+ sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler),
+ fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
+ scheduler),
+ wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) {
scheduler.SetQueryCache(query_cache);
+ if (device.UseAsynchronousShaders()) {
+ async_shaders.AllocateWorkers();
+ }
}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -308,13 +413,13 @@ RasterizerVulkan::~RasterizerVulkan() = default;
void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
+ SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
query_cache.UpdateCounters();
- const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key;
- key.fixed_state.Fill(gpu.regs);
+ key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
@@ -332,27 +437,32 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
buffer_cache.Unmap();
- const Texceptions texceptions = UpdateAttachments();
+ const Texceptions texceptions = UpdateAttachments(false);
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
key.renderpass_params = GetRenderPassParams(texceptions);
key.padding = 0;
- auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
- scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+ auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+ if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
+ // Async graphics pipeline was not ready.
+ return;
+ }
+
+ scheduler.BindGraphicsPipeline(pipeline->GetHandle());
- const auto renderpass = pipeline.GetRenderPass();
+ const auto renderpass = pipeline->GetRenderPass();
const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
UpdateDynamicStates();
- buffer_bindings.Bind(scheduler);
+ buffer_bindings.Bind(device, scheduler);
BeginTransformFeedback();
- const auto pipeline_layout = pipeline.GetLayout();
- const auto descriptor_set = pipeline.CommitDescriptorSet();
+ const auto pipeline_layout = pipeline->GetLayout();
+ const auto descriptor_set = pipeline->CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
if (descriptor_set) {
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
@@ -362,15 +472,12 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
});
EndTransformFeedback();
-
- system.GPU().TickWork();
}
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- const auto& gpu = system.GPU().Maxwell3D();
- if (!system.GPU().Maxwell3D().ShouldExecute()) {
+ if (!maxwell3d.ShouldExecute()) {
return;
}
@@ -379,7 +486,7 @@ void RasterizerVulkan::Clear() {
query_cache.UpdateCounters();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -388,7 +495,7 @@ void RasterizerVulkan::Clear() {
return;
}
- [[maybe_unused]] const auto texceptions = UpdateAttachments();
+ [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
DEBUG_ASSERT(texceptions.none());
SetupImageTransitions(0, color_attachments, zeta_attachment);
@@ -409,10 +516,11 @@ void RasterizerVulkan::Clear() {
const u32 color_attachment = regs.clear_buffers.RT;
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
- VkClearAttachment attachment;
- attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- attachment.colorAttachment = color_attachment;
- attachment.clearValue = clear_value;
+ const VkClearAttachment attachment{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = color_attachment,
+ .clearValue = clear_value,
+ };
cmdbuf.ClearAttachments(attachment, clear_rect);
});
}
@@ -430,10 +538,6 @@ void RasterizerVulkan::Clear() {
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
- VkClearValue clear_value;
- clear_value.depthStencil.depth = clear_depth;
- clear_value.depthStencil.stencil = clear_stencil;
-
VkClearAttachment attachment;
attachment.aspectMask = aspect_flags;
attachment.colorAttachment = 0;
@@ -451,14 +555,17 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
query_cache.UpdateCounters();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
- ComputePipelineCacheKey key;
- key.shader = code_addr;
- key.shared_memory_size = launch_desc.shared_alloc;
- key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
- launch_desc.block_dim_z};
-
- auto& pipeline = pipeline_cache.GetComputePipeline(key);
+ const auto& launch_desc = kepler_compute.launch_description;
+ auto& pipeline = pipeline_cache.GetComputePipeline({
+ .shader = code_addr,
+ .shared_memory_size = launch_desc.shared_alloc,
+ .workgroup_size =
+ {
+ launch_desc.block_dim_x,
+ launch_desc.block_dim_y,
+ launch_desc.block_dim_z,
+ },
+ });
// Compute dispatches can't be executed inside a renderpass
scheduler.RequestOutsideRenderPassOperationContext();
@@ -468,8 +575,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
const auto& entries = pipeline.GetEntries();
SetupComputeConstBuffers(entries);
SetupComputeGlobalBuffers(entries);
- SetupComputeTexelBuffers(entries);
+ SetupComputeUniformTexels(entries);
SetupComputeTextures(entries);
+ SetupComputeStorageTexels(entries);
SetupComputeImages(entries);
buffer_cache.Unmap();
@@ -543,16 +651,14 @@ void RasterizerVulkan::SyncGuestHost() {
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- gpu.MemoryManager().Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -561,7 +667,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
}
void RasterizerVulkan::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -639,10 +744,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerVulkan::SetupDirtyFlags() {
- state_tracker.Initialize();
-}
-
void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@@ -664,9 +765,11 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
+RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& dirty = system.GPU().Maxwell3D().dirty.flags;
+
+ const auto& regs = maxwell3d.regs;
+ auto& dirty = maxwell3d.dirty.flags;
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false;
@@ -675,7 +778,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) {
- color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+ const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
+ color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true;
@@ -683,7 +787,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
}
if (update_rendertargets) {
- zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+ const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
+ zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -708,21 +813,26 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
VkRenderPass renderpass) {
- FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
- std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
+ FramebufferCacheKey key{
+ .renderpass = renderpass,
+ .width = std::numeric_limits<u32>::max(),
+ .height = std::numeric_limits<u32>::max(),
+ .layers = std::numeric_limits<u32>::max(),
+ .views = {},
+ };
const auto try_push = [&key](const View& view) {
if (!view) {
return false;
}
- key.views.push_back(view->GetHandle());
+ key.views.push_back(view->GetAttachment());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
key.layers = std::min(key.layers, view->GetNumLayers());
return true;
};
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < num_attachments; ++index) {
if (try_push(color_attachments[index])) {
@@ -736,17 +846,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
auto& framebuffer = fbentry->second;
if (is_cache_miss) {
- VkFramebufferCreateInfo framebuffer_ci;
- framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
- framebuffer_ci.pNext = nullptr;
- framebuffer_ci.flags = 0;
- framebuffer_ci.renderPass = key.renderpass;
- framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size());
- framebuffer_ci.pAttachments = key.views.data();
- framebuffer_ci.width = key.width;
- framebuffer_ci.height = key.height;
- framebuffer_ci.layers = key.layers;
- framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci);
+ framebuffer = device.GetLogical().CreateFramebuffer({
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .renderPass = key.renderpass,
+ .attachmentCount = static_cast<u32>(key.views.size()),
+ .pAttachments = key.views.data(),
+ .width = key.width,
+ .height = key.height,
+ .layers = key.layers,
+ });
}
return {*framebuffer, VkExtent2D{key.width, key.height}};
@@ -758,13 +868,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Geometry);
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
- SetupVertexArrays(fixed_state.vertex_input, buffer_bindings);
+ SetupVertexArrays(buffer_bindings);
const u32 base_instance = regs.vb_base_instance;
- const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
+ const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
@@ -775,20 +884,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
}
void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
texture_cache.GuardSamplers(true);
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
// Skip VertexA stage
- const auto& shader = shaders[stage + 1];
+ Shader* const shader = shaders[stage + 1];
if (!shader) {
continue;
}
const auto& entries = shader->GetEntries();
SetupGraphicsConstBuffers(entries, stage);
SetupGraphicsGlobalBuffers(entries, stage);
- SetupGraphicsTexelBuffers(entries, stage);
+ SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage);
+ SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage);
}
texture_cache.GuardSamplers(false);
@@ -824,20 +934,35 @@ void RasterizerVulkan::SetupImageTransitions(
}
void RasterizerVulkan::UpdateDynamicStates() {
- auto& regs = system.GPU().Maxwell3D().regs;
+ auto& regs = maxwell3d.regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
UpdateBlendConstants(regs);
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ UpdateCullMode(regs);
+ UpdateDepthBoundsTestEnable(regs);
+ UpdateDepthTestEnable(regs);
+ UpdateDepthWriteEnable(regs);
+ UpdateDepthCompareOp(regs);
+ UpdateFrontFace(regs);
+ UpdatePrimitiveTopology(regs);
+ UpdateStencilOp(regs);
+ UpdateStencilTestEnable(regs);
+ }
}
void RasterizerVulkan::BeginTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
+ if (!device.IsExtTransformFeedbackSupported()) {
+ LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
+ return;
+ }
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
@@ -852,60 +977,47 @@ void RasterizerVulkan::BeginTransformFeedback() {
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GPUVAddr gpu_addr = binding.Address();
- const std::size_t size = binding.buffer_size;
- const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
+ const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
}
void RasterizerVulkan::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
+ if (!device.IsExtTransformFeedbackSupported()) {
+ return;
+ }
scheduler.Record(
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
-void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
- BufferBindings& buffer_bindings) {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
- const auto& attrib = regs.vertex_attrib_format[index];
- if (attrib.IsConstant()) {
- vertex_input.SetAttribute(index, false, 0, 0, {}, {});
- continue;
- }
- vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
- attrib.size.Value());
- }
+void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled()) {
- vertex_input.SetBinding(index, false, 0, 0);
continue;
}
- vertex_input.SetBinding(
- index, true, vertex_array.stride,
- regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
-
const GPUVAddr start{vertex_array.StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
ASSERT(end >= start);
- const std::size_t size{end - start};
+ const std::size_t size = end - start;
if (size == 0) {
- buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
+ buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
continue;
}
- const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
- buffer_bindings.AddVertexBinding(buffer, offset);
+ const auto info = buffer_cache.UploadMemory(start, size);
+ buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
}
}
@@ -914,7 +1026,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
if (params.num_vertices == 0) {
return;
}
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads: {
if (!params.is_indexed) {
@@ -927,7 +1039,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break;
}
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ VkBuffer buffer = info.handle;
+ u64 offset = info.offset;
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
@@ -941,7 +1055,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break;
}
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ VkBuffer buffer = info.handle;
+ u64 offset = info.offset;
auto format = regs.index_array.format;
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -958,8 +1074,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& shader_stage = gpu.state.shader_stages[stage];
+ const auto& shader_stage = maxwell3d.state.shader_stages[stage];
for (const auto& entry : entries.const_buffers) {
SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
}
@@ -967,8 +1082,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- auto& gpu{system.GPU()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
+ const auto& cbufs{maxwell3d.state.shader_stages[stage]};
for (const auto& entry : entries.global_buffers) {
const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
@@ -976,38 +1090,43 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
}
}
-void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
+void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
- for (const auto& entry : entries.texel_buffers) {
- const auto image = GetTextureInfo(gpu, entry, stage).tic;
- SetupTexelBuffer(image, entry);
+ for (const auto& entry : entries.uniform_texels) {
+ const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
+ SetupUniformTexels(image, entry);
}
}
void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, stage, i);
+ const auto texture = GetTextureInfo(maxwell3d, entry, stage, i);
SetupTexture(texture, entry);
}
}
}
+void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
+ MICROPROFILE_SCOPE(Vulkan_Textures);
+ for (const auto& entry : entries.storage_texels) {
+ const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
+ SetupStorageTexel(image, entry);
+ }
+}
+
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, stage).tic;
+ const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupImage(tic, entry);
}
}
void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@@ -1021,38 +1140,43 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
+ const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
for (const auto& entry : entries.global_buffers) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
SetupGlobalBuffer(entry, addr);
}
}
-void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
+void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
- for (const auto& entry : entries.texel_buffers) {
- const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
- SetupTexelBuffer(image, entry);
+ for (const auto& entry : entries.uniform_texels) {
+ const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
+ SetupUniformTexels(image, entry);
}
}
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
+ const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i);
SetupTexture(texture, entry);
}
}
}
+void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
+ MICROPROFILE_SCOPE(Vulkan_Textures);
+ for (const auto& entry : entries.storage_texels) {
+ const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
+ SetupStorageTexel(image, entry);
+ }
+}
+
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupImage(tic, entry);
}
}
@@ -1070,16 +1194,14 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
ASSERT(size <= MaxConstbufferSize);
- const auto [buffer_handle, offset] =
+ const auto info =
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
-
- update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
}
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto actual_addr = memory_manager.Read<u64>(address);
- const auto size = memory_manager.Read<u32>(address + 8);
+ const u64 actual_addr = gpu_memory.Read<u64>(address);
+ const u32 size = gpu_memory.Read<u32>(address + 8);
if (size == 0) {
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry
@@ -1087,18 +1209,18 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
// default buffer.
static constexpr std::size_t dummy_size = 4;
- const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
- update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
+ const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
return;
}
- const auto [buffer, offset] = buffer_cache.UploadMemory(
+ const auto info = buffer_cache.UploadMemory(
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
- update_descriptor_queue.AddBuffer(buffer, offset, size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
}
-void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
- const TexelBufferEntry& entry) {
+void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
+ const UniformTexelEntry& entry) {
const auto view = texture_cache.GetTextureSurface(tic, entry);
ASSERT(view->IsBufferView());
@@ -1110,16 +1232,24 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
ASSERT(!view->IsBufferView());
- const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
+ const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
+ texture.tic.z_source, texture.tic.w_source);
const auto sampler = sampler_cache.GetSampler(texture.tsc);
update_descriptor_queue.AddSampledImage(sampler, image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
sampled_views.push_back(ImageView{std::move(view), image_layout});
}
+void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
+ const StorageTexelEntry& entry) {
+ const auto view = texture_cache.GetImageSurface(tic, entry);
+ ASSERT(view->IsBufferView());
+
+ update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
+}
+
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
auto view = texture_cache.GetImageSurface(tic, entry);
@@ -1129,10 +1259,11 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
UNIMPLEMENTED_IF(tic.IsBuffer());
- const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ const VkImageView image_view =
+ view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
update_descriptor_queue.AddImage(image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_GENERAL;
image_views.push_back(ImageView{std::move(view), image_layout});
}
@@ -1227,6 +1358,117 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
}
}
+void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchCullMode()) {
+ return;
+ }
+ scheduler.Record(
+ [enabled = regs.cull_test_enabled, cull_face = regs.cull_face](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthBoundsTestEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthBoundsTestEnableEXT(enable);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthTestEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthTestEnableEXT(enable);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthWriteEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthWriteEnableEXT(enable);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthCompareOp()) {
+ return;
+ }
+ scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func));
+ });
+}
+
+void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchFrontFace()) {
+ return;
+ }
+
+ VkFrontFace front_face = MaxwellToVK::FrontFace(regs.front_face);
+ if (regs.screen_y_control.triangle_rast_flip != 0) {
+ front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE
+ : VK_FRONT_FACE_CLOCKWISE;
+ }
+ scheduler.Record(
+ [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); });
+}
+
+void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) {
+ const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
+ if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) {
+ return;
+ }
+ scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology));
+ });
+}
+
+void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchStencilOp()) {
+ return;
+ }
+ const Maxwell::StencilOp fail = regs.stencil_front_op_fail;
+ const Maxwell::StencilOp zfail = regs.stencil_front_op_zfail;
+ const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass;
+ const Maxwell::ComparisonOp compare = regs.stencil_front_func_func;
+ if (regs.stencil_two_side_enable) {
+ scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
+ MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+ MaxwellToVK::ComparisonOp(compare));
+ });
+ } else {
+ const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail;
+ const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail;
+ const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass;
+ const Maxwell::ComparisonOp back_compare = regs.stencil_back_func_func;
+ scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass,
+ back_compare](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail),
+ MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+ MaxwellToVK::ComparisonOp(compare));
+ cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_BACK_BIT, MaxwellToVK::StencilOp(back_fail),
+ MaxwellToVK::StencilOp(back_zpass),
+ MaxwellToVK::StencilOp(back_zfail),
+ MaxwellToVK::ComparisonOp(back_compare));
+ });
+ }
+}
+
+void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchStencilTestEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetStencilTestEnableEXT(enable);
+ });
+}
+
std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
std::size_t size = CalculateVertexArraysSize();
if (is_indexed) {
@@ -1242,7 +1484,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
}
std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -1257,9 +1499,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
}
std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+ return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+ static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
std::size_t RasterizerVulkan::CalculateConstBufferSize(
@@ -1274,7 +1515,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
}
RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
RenderPassParams params;
@@ -1304,17 +1545,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() {
return *default_buffer;
}
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = DEFAULT_BUFFER_SIZE;
- ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- default_buffer = device.GetLogical().CreateBuffer(ci);
+ default_buffer = device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = DEFAULT_BUFFER_SIZE,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
default_buffer_commit = memory_manager.Commit(default_buffer, false);
scheduler.RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0ed0e48c6..b47c8fc13 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -25,13 +25,13 @@
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
namespace Core {
class System;
@@ -105,10 +105,11 @@ struct ImageView {
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
+ explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
VKScreenInfo& screen_info, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- StateTracker& state_tracker, VKScheduler& scheduler);
+ VKMemoryManager& memory_manager, StateTracker& state_tracker,
+ VKScheduler& scheduler);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -134,7 +135,14 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void SetupDirtyFlags() override;
+
+ VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+ return async_shaders;
+ }
+
+ const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+ return async_shaders;
+ }
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -159,7 +167,10 @@ private:
void FlushWork();
- Texceptions UpdateAttachments();
+ /// @brief Updates the currently bound attachments
+ /// @param is_clear True when the framebuffer is updated as a clear
+ /// @return Bitfield of attachments being used as sampled textures
+ Texceptions UpdateAttachments(bool is_clear);
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
@@ -168,7 +179,7 @@ private:
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+ void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void SetupImageTransitions(Texceptions texceptions,
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
@@ -182,8 +193,7 @@ private:
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
- void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
- BufferBindings& buffer_bindings);
+ void SetupVertexArrays(BufferBindings& buffer_bindings);
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
@@ -193,12 +203,15 @@ private:
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
- /// Setup texel buffers in the graphics pipeline.
- void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
+ /// Setup uniform texels in the graphics pipeline.
+ void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup textures in the graphics pipeline.
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
+ /// Setup storage texels in the graphics pipeline.
+ void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
+
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
@@ -209,11 +222,14 @@ private:
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
- void SetupComputeTexelBuffers(const ShaderEntries& entries);
+ void SetupComputeUniformTexels(const ShaderEntries& entries);
/// Setup textures in the compute pipeline.
void SetupComputeTextures(const ShaderEntries& entries);
+ /// Setup storage texels in the compute pipeline.
+ void SetupComputeStorageTexels(const ShaderEntries& entries);
+
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
@@ -222,10 +238,12 @@ private:
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
- void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
+ void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
+ void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
+
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -235,6 +253,16 @@ private:
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+
std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
std::size_t CalculateComputeStreamBufferSize() const;
@@ -250,11 +278,13 @@ private:
VkBuffer DefaultBuffer();
- Core::System& system;
- Core::Frontend::EmuWindow& render_window;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+
VKScreenInfo& screen_info;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
StateTracker& state_tracker;
VKScheduler& scheduler;
@@ -271,12 +301,13 @@ private:
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
- VKFenceManager fence_manager;
VKQueryCache query_cache;
+ VKFenceManager fence_manager;
vk::Buffer default_buffer;
VKMemoryCommit default_buffer_commit;
vk::Event wfi_event;
+ VideoCommon::Shader::AsyncShaders async_shaders;
std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
index 3f71d005e..80284cf92 100644
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
using namespace VideoCore::Surface;
+ const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
+
std::vector<VkAttachmentDescription> descriptors;
+ descriptors.reserve(num_attachments);
+
std::vector<VkAttachmentReference> color_references;
+ color_references.reserve(num_attachments);
- const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
for (std::size_t rt = 0; rt < num_attachments; ++rt) {
const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
@@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- VkAttachmentDescription& descriptor = descriptors.emplace_back();
- descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT;
- descriptor.format = format.format;
- descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
- descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
- descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
- descriptor.initialLayout = color_layout;
- descriptor.finalLayout = color_layout;
-
- VkAttachmentReference& reference = color_references.emplace_back();
- reference.attachment = static_cast<u32>(rt);
- reference.layout = color_layout;
+ descriptors.push_back({
+ .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+ .format = format.format,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = color_layout,
+ .finalLayout = color_layout,
+ });
+
+ color_references.push_back({
+ .attachment = static_cast<u32>(rt),
+ .layout = color_layout,
+ });
}
VkAttachmentReference zeta_attachment_ref;
@@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
const VkImageLayout zeta_layout = params.zeta_texception != 0
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
- VkAttachmentDescription& descriptor = descriptors.emplace_back();
- descriptor.flags = 0;
- descriptor.format = format.format;
- descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
- descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
- descriptor.initialLayout = zeta_layout;
- descriptor.finalLayout = zeta_layout;
-
- zeta_attachment_ref.attachment = static_cast<u32>(num_attachments);
- zeta_attachment_ref.layout = zeta_layout;
+ descriptors.push_back({
+ .flags = 0,
+ .format = format.format,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = zeta_layout,
+ .finalLayout = zeta_layout,
+ });
+
+ zeta_attachment_ref = {
+ .attachment = static_cast<u32>(num_attachments),
+ .layout = zeta_layout,
+ };
}
- VkSubpassDescription subpass_description;
- subpass_description.flags = 0;
- subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- subpass_description.inputAttachmentCount = 0;
- subpass_description.pInputAttachments = nullptr;
- subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size());
- subpass_description.pColorAttachments = color_references.data();
- subpass_description.pResolveAttachments = nullptr;
- subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr;
- subpass_description.preserveAttachmentCount = 0;
- subpass_description.pPreserveAttachments = nullptr;
+ const VkSubpassDescription subpass_description{
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .pInputAttachments = nullptr,
+ .colorAttachmentCount = static_cast<u32>(color_references.size()),
+ .pColorAttachments = color_references.data(),
+ .pResolveAttachments = nullptr,
+ .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = nullptr,
+ };
VkAccessFlags access = 0;
VkPipelineStageFlags stage = 0;
@@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
}
- VkSubpassDependency subpass_dependency;
- subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
- subpass_dependency.dstSubpass = 0;
- subpass_dependency.srcStageMask = stage;
- subpass_dependency.dstStageMask = stage;
- subpass_dependency.srcAccessMask = 0;
- subpass_dependency.dstAccessMask = access;
- subpass_dependency.dependencyFlags = 0;
-
- VkRenderPassCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.attachmentCount = static_cast<u32>(descriptors.size());
- ci.pAttachments = descriptors.data();
- ci.subpassCount = 1;
- ci.pSubpasses = &subpass_description;
- ci.dependencyCount = 1;
- ci.pDependencies = &subpass_dependency;
- return device.GetLogical().CreateRenderPass(ci);
+ const VkSubpassDependency subpass_dependency{
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = stage,
+ .dstStageMask = stage,
+ .srcAccessMask = 0,
+ .dstAccessMask = access,
+ .dependencyFlags = 0,
+ };
+
+ return device.GetLogical().CreateRenderPass({
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .attachmentCount = static_cast<u32>(descriptors.size()),
+ .pAttachments = descriptors.data(),
+ .subpassCount = 1,
+ .pSubpasses = &subpass_description,
+ .dependencyCount = 1,
+ .pDependencies = &subpass_dependency,
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
deleted file mode 100644
index dc06f545a..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ /dev/null
@@ -1,312 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <optional>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-namespace {
-
-// TODO(Rodrigo): Fine tune these numbers.
-constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
-constexpr std::size_t FENCES_GROW_STEP = 0x40;
-
-VkFenceCreateInfo BuildFenceCreateInfo() {
- VkFenceCreateInfo fence_ci;
- fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
- fence_ci.pNext = nullptr;
- fence_ci.flags = 0;
- return fence_ci;
-}
-
-} // Anonymous namespace
-
-class CommandBufferPool final : public VKFencedPool {
-public:
- CommandBufferPool(const VKDevice& device)
- : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
-
- void Allocate(std::size_t begin, std::size_t end) override {
- // Command buffers are going to be commited, recorded, executed every single usage cycle.
- // They are also going to be reseted when commited.
- VkCommandPoolCreateInfo command_pool_ci;
- command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
- command_pool_ci.pNext = nullptr;
- command_pool_ci.flags =
- VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
- command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily();
-
- Pool& pool = pools.emplace_back();
- pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci);
- pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
- }
-
- VkCommandBuffer Commit(VKFence& fence) {
- const std::size_t index = CommitResource(fence);
- const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
- const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
- return pools[pool_index].cmdbufs[sub_index];
- }
-
-private:
- struct Pool {
- vk::CommandPool handle;
- vk::CommandBuffers cmdbufs;
- };
-
- const VKDevice& device;
- std::vector<Pool> pools;
-};
-
-VKResource::VKResource() = default;
-
-VKResource::~VKResource() = default;
-
-VKFence::VKFence(const VKDevice& device)
- : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {}
-
-VKFence::~VKFence() = default;
-
-void VKFence::Wait() {
- switch (const VkResult result = handle.Wait()) {
- case VK_SUCCESS:
- return;
- case VK_ERROR_DEVICE_LOST:
- device.ReportLoss();
- [[fallthrough]];
- default:
- throw vk::Exception(result);
- }
-}
-
-void VKFence::Release() {
- ASSERT(is_owned);
- is_owned = false;
-}
-
-void VKFence::Commit() {
- is_owned = true;
- is_used = true;
-}
-
-bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
- if (!is_used) {
- // If a fence is not used it's always free.
- return true;
- }
- if (is_owned && !owner_wait) {
- // The fence is still being owned (Release has not been called) and ownership wait has
- // not been asked.
- return false;
- }
-
- if (gpu_wait) {
- // Wait for the fence if it has been requested.
- (void)handle.Wait();
- } else {
- if (handle.GetStatus() != VK_SUCCESS) {
- // Vulkan fence is not ready, not much it can do here
- return false;
- }
- }
-
- // Broadcast resources their free state.
- for (auto* resource : protected_resources) {
- resource->OnFenceRemoval(this);
- }
- protected_resources.clear();
-
- // Prepare fence for reusage.
- handle.Reset();
- is_used = false;
- return true;
-}
-
-void VKFence::Protect(VKResource* resource) {
- protected_resources.push_back(resource);
-}
-
-void VKFence::Unprotect(VKResource* resource) {
- const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
- ASSERT(it != protected_resources.end());
-
- resource->OnFenceRemoval(this);
- protected_resources.erase(it);
-}
-
-void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept {
- std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource,
- new_resource);
-}
-
-VKFenceWatch::VKFenceWatch() = default;
-
-VKFenceWatch::VKFenceWatch(VKFence& initial_fence) {
- Watch(initial_fence);
-}
-
-VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept {
- fence = std::exchange(rhs.fence, nullptr);
- if (fence) {
- fence->RedirectProtection(&rhs, this);
- }
-}
-
-VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept {
- fence = std::exchange(rhs.fence, nullptr);
- if (fence) {
- fence->RedirectProtection(&rhs, this);
- }
- return *this;
-}
-
-VKFenceWatch::~VKFenceWatch() {
- if (fence) {
- fence->Unprotect(this);
- }
-}
-
-void VKFenceWatch::Wait() {
- if (fence == nullptr) {
- return;
- }
- fence->Wait();
- fence->Unprotect(this);
-}
-
-void VKFenceWatch::Watch(VKFence& new_fence) {
- Wait();
- fence = &new_fence;
- fence->Protect(this);
-}
-
-bool VKFenceWatch::TryWatch(VKFence& new_fence) {
- if (fence) {
- return false;
- }
- fence = &new_fence;
- fence->Protect(this);
- return true;
-}
-
-void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
- ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
- fence = nullptr;
-}
-
-VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
-
-VKFencedPool::~VKFencedPool() = default;
-
-std::size_t VKFencedPool::CommitResource(VKFence& fence) {
- const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
- for (std::size_t iterator = begin; iterator < end; ++iterator) {
- if (watches[iterator]->TryWatch(fence)) {
- // The resource is now being watched, a free resource was successfully found.
- return iterator;
- }
- }
- return {};
- };
- // Try to find a free resource from the hinted position to the end.
- auto found = Search(free_iterator, watches.size());
- if (!found) {
- // Search from beginning to the hinted position.
- found = Search(0, free_iterator);
- if (!found) {
- // Both searches failed, the pool is full; handle it.
- const std::size_t free_resource = ManageOverflow();
-
- // Watch will wait for the resource to be free.
- watches[free_resource]->Watch(fence);
- found = free_resource;
- }
- }
- // Free iterator is hinted to the resource after the one that's been commited.
- free_iterator = (*found + 1) % watches.size();
- return *found;
-}
-
-std::size_t VKFencedPool::ManageOverflow() {
- const std::size_t old_capacity = watches.size();
- Grow();
-
- // The last entry is guaranted to be free, since it's the first element of the freshly
- // allocated resources.
- return old_capacity;
-}
-
-void VKFencedPool::Grow() {
- const std::size_t old_capacity = watches.size();
- watches.resize(old_capacity + grow_step);
- std::generate(watches.begin() + old_capacity, watches.end(),
- []() { return std::make_unique<VKFenceWatch>(); });
- Allocate(old_capacity, old_capacity + grow_step);
-}
-
-VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
- GrowFences(FENCES_GROW_STEP);
- command_buffer_pool = std::make_unique<CommandBufferPool>(device);
-}
-
-VKResourceManager::~VKResourceManager() = default;
-
-VKFence& VKResourceManager::CommitFence() {
- const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
- const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
- const auto hinted = fences.begin() + fences_iterator;
-
- auto it = std::find_if(hinted, fences.end(), Tick);
- if (it == fences.end()) {
- it = std::find_if(fences.begin(), hinted, Tick);
- if (it == hinted) {
- return nullptr;
- }
- }
- fences_iterator = std::distance(fences.begin(), it) + 1;
- if (fences_iterator >= fences.size())
- fences_iterator = 0;
-
- auto& fence = *it;
- fence->Commit();
- return fence.get();
- };
-
- VKFence* found_fence = StepFences(false, false);
- if (!found_fence) {
- // Try again, this time waiting.
- found_fence = StepFences(true, false);
-
- if (!found_fence) {
- // Allocate new fences and try again.
- LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
- fences.size() + FENCES_GROW_STEP);
-
- GrowFences(FENCES_GROW_STEP);
- found_fence = StepFences(true, false);
- ASSERT(found_fence != nullptr);
- }
- }
- return *found_fence;
-}
-
-VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
- return command_buffer_pool->Commit(fence);
-}
-
-void VKResourceManager::GrowFences(std::size_t new_fences_count) {
- const std::size_t previous_size = fences.size();
- fences.resize(previous_size + new_fences_count);
-
- std::generate(fences.begin() + previous_size, fences.end(),
- [this] { return std::make_unique<VKFence>(device); });
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
deleted file mode 100644
index f683d2276..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <memory>
-#include <vector>
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-class VKDevice;
-class VKFence;
-class VKResourceManager;
-
-class CommandBufferPool;
-
-/// Interface for a Vulkan resource
-class VKResource {
-public:
- explicit VKResource();
- virtual ~VKResource();
-
- /**
- * Signals the object that an owning fence has been signaled.
- * @param signaling_fence Fence that signals its usage end.
- */
- virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
-};
-
-/**
- * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
- * They must be commited from the resource manager. Their usage flow is: commit the fence from the
- * resource manager, protect resources with it and use them, send the fence to an execution queue
- * and Wait for it if needed and then call Release. Used resources will automatically be signaled
- * when they are free to be reused.
- * @brief Protects resources for concurrent usage and signals its release.
- */
-class VKFence {
- friend class VKResourceManager;
-
-public:
- explicit VKFence(const VKDevice& device);
- ~VKFence();
-
- /**
- * Waits for the fence to be signaled.
- * @warning You must have ownership of the fence and it has to be previously sent to a queue to
- * call this function.
- */
- void Wait();
-
- /**
- * Releases ownership of the fence. Pass after it has been sent to an execution queue.
- * Unmanaged usage of the fence after the call will result in undefined behavior because it may
- * be being used for something else.
- */
- void Release();
-
- /// Protects a resource with this fence.
- void Protect(VKResource* resource);
-
- /// Removes protection for a resource.
- void Unprotect(VKResource* resource);
-
- /// Redirects one protected resource to a new address.
- void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
-
- /// Retreives the fence.
- operator VkFence() const {
- return *handle;
- }
-
-private:
- /// Take ownership of the fence.
- void Commit();
-
- /**
- * Updates the fence status.
- * @warning Waiting for the owner might soft lock the execution.
- * @param gpu_wait Wait for the fence to be signaled by the driver.
- * @param owner_wait Wait for the owner to signal its freedom.
- * @returns True if the fence is free. Waiting for gpu and owner will always return true.
- */
- bool Tick(bool gpu_wait, bool owner_wait);
-
- const VKDevice& device; ///< Device handler
- vk::Fence handle; ///< Vulkan fence
- std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
- bool is_owned = false; ///< The fence has been commited but not released yet.
- bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
-};
-
-/**
- * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
- * resources without having to inherit VKResource from their handlers.
- */
-class VKFenceWatch final : public VKResource {
-public:
- explicit VKFenceWatch();
- VKFenceWatch(VKFence& initial_fence);
- VKFenceWatch(VKFenceWatch&&) noexcept;
- VKFenceWatch(const VKFenceWatch&) = delete;
- ~VKFenceWatch() override;
-
- VKFenceWatch& operator=(VKFenceWatch&&) noexcept;
-
- /// Waits for the fence to be released.
- void Wait();
-
- /**
- * Waits for a previous fence and watches a new one.
- * @param new_fence New fence to wait to.
- */
- void Watch(VKFence& new_fence);
-
- /**
- * Checks if it's currently being watched and starts watching it if it's available.
- * @returns True if a watch has started, false if it's being watched.
- */
- bool TryWatch(VKFence& new_fence);
-
- void OnFenceRemoval(VKFence* signaling_fence) override;
-
- /**
- * Do not use it paired with Watch. Use TryWatch instead.
- * Returns true when the watch is free.
- */
- bool IsUsed() const {
- return fence != nullptr;
- }
-
-private:
- VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
-};
-
-/**
- * Handles a pool of resources protected by fences. Manages resource overflow allocating more
- * resources.
- */
-class VKFencedPool {
-public:
- explicit VKFencedPool(std::size_t grow_step);
- virtual ~VKFencedPool();
-
-protected:
- /**
- * Commits a free resource and protects it with a fence. It may allocate new resources.
- * @param fence Fence that protects the commited resource.
- * @returns Index of the resource commited.
- */
- std::size_t CommitResource(VKFence& fence);
-
- /// Called when a chunk of resources have to be allocated.
- virtual void Allocate(std::size_t begin, std::size_t end) = 0;
-
-private:
- /// Manages pool overflow allocating new resources.
- std::size_t ManageOverflow();
-
- /// Allocates a new page of resources.
- void Grow();
-
- std::size_t grow_step = 0; ///< Number of new resources created after an overflow
- std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
- std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
-};
-
-/**
- * The resource manager handles all resources that can be protected with a fence avoiding
- * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
- */
-class VKResourceManager final {
-public:
- explicit VKResourceManager(const VKDevice& device);
- ~VKResourceManager();
-
- /// Commits a fence. It has to be sent to a queue and released.
- VKFence& CommitFence();
-
- /// Commits an unused command buffer and protects it with a fence.
- VkCommandBuffer CommitCommandBuffer(VKFence& fence);
-
-private:
- /// Allocates new fences.
- void GrowFences(std::size_t new_fences_count);
-
- const VKDevice& device; ///< Device handler.
- std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
- std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
- std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
new file mode 100644
index 000000000..ee274ac59
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -0,0 +1,63 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+
+namespace Vulkan {
+
+ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
+ : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
+
+ResourcePool::~ResourcePool() = default;
+
+size_t ResourcePool::CommitResource() {
+ // Refresh semaphore to query updated results
+ master_semaphore.Refresh();
+
+ const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> {
+ for (size_t iterator = begin; iterator < end; ++iterator) {
+ if (master_semaphore.IsFree(ticks[iterator])) {
+ ticks[iterator] = master_semaphore.CurrentTick();
+ return iterator;
+ }
+ }
+ return {};
+ };
+ // Try to find a free resource from the hinted position to the end.
+ auto found = search(free_iterator, ticks.size());
+ if (!found) {
+ // Search from beginning to the hinted position.
+ found = search(0, free_iterator);
+ if (!found) {
+ // Both searches failed, the pool is full; handle it.
+ const size_t free_resource = ManageOverflow();
+
+ ticks[free_resource] = master_semaphore.CurrentTick();
+ found = free_resource;
+ }
+ }
+ // Free iterator is hinted to the resource after the one that's been commited.
+ free_iterator = (*found + 1) % ticks.size();
+ return *found;
+}
+
+size_t ResourcePool::ManageOverflow() {
+ const size_t old_capacity = ticks.size();
+ Grow();
+
+ // The last entry is guaranted to be free, since it's the first element of the freshly
+ // allocated resources.
+ return old_capacity;
+}
+
+void ResourcePool::Grow() {
+ const size_t old_capacity = ticks.size();
+ ticks.resize(old_capacity + grow_step);
+ Allocate(old_capacity, old_capacity + grow_step);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
new file mode 100644
index 000000000..a018c7ec2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -0,0 +1,43 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Vulkan {
+
+class MasterSemaphore;
+
+/**
+ * Handles a pool of resources protected by fences. Manages resource overflow allocating more
+ * resources.
+ */
+class ResourcePool {
+public:
+ explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
+ virtual ~ResourcePool();
+
+protected:
+ size_t CommitResource();
+
+ /// Called when a chunk of resources have to be allocated.
+ virtual void Allocate(size_t begin, size_t end) = 0;
+
+private:
+ /// Manages pool overflow allocating new resources.
+ size_t ManageOverflow();
+
+ /// Allocates a new page of resources.
+ void Grow();
+
+ MasterSemaphore& master_semaphore;
+ size_t grow_step = 0; ///< Number of new resources created after an overflow
+ size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
+ std::vector<u64> ticks; ///< Ticks for each resource
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index e6f2fa553..b068888f9 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -9,6 +9,8 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/textures/texture.h"
+using Tegra::Texture::TextureMipmapFilter;
+
namespace Vulkan {
namespace {
@@ -42,32 +44,36 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
const std::array color = tsc.GetBorderColor();
- VkSamplerCustomBorderColorCreateInfoEXT border;
- border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT;
- border.pNext = nullptr;
- border.format = VK_FORMAT_UNDEFINED;
+ VkSamplerCustomBorderColorCreateInfoEXT border{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .customBorderColor = {},
+ .format = VK_FORMAT_UNDEFINED,
+ };
std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
- VkSamplerCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
- ci.pNext = arbitrary_borders ? &border : nullptr;
- ci.flags = 0;
- ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter);
- ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter);
- ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter);
- ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter);
- ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter);
- ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter);
- ci.mipLodBias = tsc.GetLodBias();
- ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE;
- ci.maxAnisotropy = tsc.GetMaxAnisotropy();
- ci.compareEnable = tsc.depth_compare_enabled;
- ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
- ci.minLod = tsc.GetMinLod();
- ci.maxLod = tsc.GetMaxLod();
- ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
- ci.unnormalizedCoordinates = VK_FALSE;
- return device.GetLogical().CreateSampler(ci);
+ return device.GetLogical().CreateSampler({
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = arbitrary_borders ? &border : nullptr,
+ .flags = 0,
+ .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+ .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
+ .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+ .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+ .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+ .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
+ .mipLodBias = tsc.GetLodBias(),
+ .anisotropyEnable =
+ static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
+ .maxAnisotropy = tsc.GetMaxAnisotropy(),
+ .compareEnable = tsc.depth_compare_enabled,
+ .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
+ .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
+ .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
+ .borderColor =
+ arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
+ .unnormalizedCoordinates = VK_FALSE,
+ });
}
VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 82ec9180e..1a483dc71 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -9,9 +9,11 @@
#include <utility>
#include "common/microprofile.h"
+#include "common/thread.h"
+#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/wrapper.h"
@@ -34,10 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
last = nullptr;
}
-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
- StateTracker& state_tracker)
- : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
- next_fence{&resource_manager.CommitFence()} {
+VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_)
+ : device{device_}, state_tracker{state_tracker_},
+ master_semaphore{std::make_unique<MasterSemaphore>(device)},
+ command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@@ -49,20 +51,27 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
-void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) {
+u64 VKScheduler::CurrentTick() const noexcept {
+ return master_semaphore->CurrentTick();
+}
+
+bool VKScheduler::IsFree(u64 tick) const noexcept {
+ return master_semaphore->IsFree(tick);
+}
+
+void VKScheduler::Wait(u64 tick) {
+ master_semaphore->Wait(tick);
+}
+
+void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
- if (release_fence) {
- current_fence->Release();
- }
AllocateNewContext();
}
-void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore semaphore) {
+ const u64 presubmit_tick = CurrentTick();
SubmitExecution(semaphore);
- current_fence->Wait();
- if (release_fence) {
- current_fence->Release();
- }
+ Wait(presubmit_tick);
AllocateNewContext();
}
@@ -99,16 +108,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame
state.framebuffer = framebuffer;
state.render_area = render_area;
- VkRenderPassBeginInfo renderpass_bi;
- renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
- renderpass_bi.pNext = nullptr;
- renderpass_bi.renderPass = renderpass;
- renderpass_bi.framebuffer = framebuffer;
- renderpass_bi.renderArea.offset.x = 0;
- renderpass_bi.renderArea.offset.y = 0;
- renderpass_bi.renderArea.extent = render_area;
- renderpass_bi.clearValueCount = 0;
- renderpass_bi.pClearValues = nullptr;
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = renderpass,
+ .framebuffer = framebuffer,
+ .renderArea =
+ {
+ .offset = {.x = 0, .y = 0},
+ .extent = render_area,
+ },
+ .clearValueCount = 0,
+ .pClearValues = nullptr,
+ };
Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
if (end_renderpass) {
@@ -133,6 +145,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
}
void VKScheduler::WorkerThread() {
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
std::unique_lock lock{mutex};
do {
cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
@@ -155,17 +168,38 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
current_cmdbuf.End();
- VkSubmitInfo submit_info;
- submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
- submit_info.pNext = nullptr;
- submit_info.waitSemaphoreCount = 0;
- submit_info.pWaitSemaphores = nullptr;
- submit_info.pWaitDstStageMask = nullptr;
- submit_info.commandBufferCount = 1;
- submit_info.pCommandBuffers = current_cmdbuf.address();
- submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
- submit_info.pSignalSemaphores = &semaphore;
- switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
+ const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+ const u32 num_signal_semaphores = semaphore ? 2U : 1U;
+
+ const u64 signal_value = master_semaphore->CurrentTick();
+ const u64 wait_value = signal_value - 1;
+ const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+
+ master_semaphore->NextTick();
+
+ const std::array signal_values{signal_value, u64(0)};
+ const std::array signal_semaphores{timeline_semaphore, semaphore};
+
+ const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+ .pNext = nullptr,
+ .waitSemaphoreValueCount = 1,
+ .pWaitSemaphoreValues = &wait_value,
+ .signalSemaphoreValueCount = num_signal_semaphores,
+ .pSignalSemaphoreValues = signal_values.data(),
+ };
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &timeline_si,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &timeline_semaphore,
+ .pWaitDstStageMask = &wait_stage_mask,
+ .commandBufferCount = 1,
+ .pCommandBuffers = current_cmdbuf.address(),
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = signal_semaphores.data(),
+ };
+ switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
@@ -177,21 +211,15 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
}
void VKScheduler::AllocateNewContext() {
- ++ticks;
-
- VkCommandBufferBeginInfo cmdbuf_bi;
- cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
- cmdbuf_bi.pNext = nullptr;
- cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
- cmdbuf_bi.pInheritanceInfo = nullptr;
-
std::unique_lock lock{mutex};
- current_fence = next_fence;
- next_fence = &resource_manager.CommitFence();
- current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence),
- device.GetDispatchLoader());
- current_cmdbuf.Begin(cmdbuf_bi);
+ current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
+ current_cmdbuf.Begin({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .pNext = nullptr,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ .pInheritanceInfo = nullptr,
+ });
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 970a65566..7be8a19f0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -16,42 +16,33 @@
namespace Vulkan {
+class CommandPool;
+class MasterSemaphore;
class StateTracker;
class VKDevice;
-class VKFence;
class VKQueryCache;
-class VKResourceManager;
-
-class VKFenceView {
-public:
- VKFenceView() = default;
- VKFenceView(VKFence* const& fence) : fence{fence} {}
-
- VKFence* operator->() const noexcept {
- return fence;
- }
-
- operator VKFence&() const noexcept {
- return *fence;
- }
-
-private:
- VKFence* const& fence;
-};
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
public:
- explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
- StateTracker& state_tracker);
+ explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker);
~VKScheduler();
+ /// Returns the current command buffer tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept;
+
+ /// Returns true when a tick has been triggered by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) const noexcept;
+
+ /// Waits for the given tick to trigger on the GPU.
+ void Wait(u64 tick);
+
/// Sends the current execution context to the GPU.
- void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr);
+ void Flush(VkSemaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
- void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr);
+ void Finish(VkSemaphore semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
@@ -86,14 +77,9 @@ public:
(void)chunk->Record(command);
}
- /// Gets a reference to the current fence.
- VKFenceView GetFence() const {
- return current_fence;
- }
-
- /// Returns the current command buffer tick.
- u64 Ticks() const {
- return ticks;
+ /// Returns the master timeline semaphore.
+ [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
+ return *master_semaphore;
}
private:
@@ -171,6 +157,13 @@ private:
std::array<u8, 0x8000> data{};
};
+ struct State {
+ VkRenderPass renderpass = nullptr;
+ VkFramebuffer framebuffer = nullptr;
+ VkExtent2D render_area = {0, 0};
+ VkPipeline graphics_pipeline = nullptr;
+ };
+
void WorkerThread();
void SubmitExecution(VkSemaphore semaphore);
@@ -186,30 +179,23 @@ private:
void AcquireNewChunk();
const VKDevice& device;
- VKResourceManager& resource_manager;
StateTracker& state_tracker;
+ std::unique_ptr<MasterSemaphore> master_semaphore;
+ std::unique_ptr<CommandPool> command_pool;
+
VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
- VKFence* current_fence = nullptr;
- VKFence* next_fence = nullptr;
-
- struct State {
- VkRenderPass renderpass = nullptr;
- VkFramebuffer framebuffer = nullptr;
- VkExtent2D render_area = {0, 0};
- VkPipeline graphics_pipeline = nullptr;
- } state;
std::unique_ptr<CommandChunk> chunk;
std::thread worker_thread;
+ State state;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
- std::atomic<u64> ticks = 0;
bool quit = false;
};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a13e8baa7..cd7d7a4e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -400,8 +400,9 @@ private:
u32 binding = specialization.base_binding;
binding = DeclareConstantBuffers(binding);
binding = DeclareGlobalBuffers(binding);
- binding = DeclareTexelBuffers(binding);
+ binding = DeclareUniformTexels(binding);
binding = DeclareSamplers(binding);
+ binding = DeclareStorageTexels(binding);
binding = DeclareImages(binding);
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
@@ -684,13 +685,19 @@ private:
}
t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
- const u32 smem_size = specialization.shared_memory_size;
+ u32 smem_size = specialization.shared_memory_size * 4;
if (smem_size == 0) {
// Avoid declaring an empty array.
return;
}
- const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4);
- const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count));
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ if (smem_size > limit) {
+ LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
+ smem_size, limit);
+ smem_size = limit;
+ }
+
+ const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
Name(type_pointer, "SharedMemory");
@@ -699,9 +706,9 @@ private:
}
void DeclareInternalFlags() {
- constexpr std::array names = {"zero", "sign", "carry", "overflow"};
+ static constexpr std::array names{"zero", "sign", "carry", "overflow"};
+
for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
- const auto flag_code = static_cast<InternalFlag>(flag);
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
}
@@ -889,7 +896,7 @@ private:
return binding;
}
- u32 DeclareTexelBuffers(u32 binding) {
+ u32 DeclareUniformTexels(u32 binding) {
for (const auto& sampler : ir.GetSamplers()) {
if (!sampler.is_buffer) {
continue;
@@ -910,7 +917,7 @@ private:
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id});
+ uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
}
return binding;
}
@@ -945,31 +952,48 @@ private:
return binding;
}
- u32 DeclareImages(u32 binding) {
+ u32 DeclareStorageTexels(u32 binding) {
for (const auto& image : ir.GetImages()) {
- const auto [dim, arrayed] = GetImageDim(image);
- constexpr int depth = 0;
- constexpr bool ms = false;
- constexpr int sampled = 2; // This won't be accessed with a sampler
- constexpr auto format = spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- if (image.is_read && !image.is_written) {
- Decorate(id, spv::Decoration::NonWritable);
- } else if (image.is_written && !image.is_read) {
- Decorate(id, spv::Decoration::NonReadable);
+ if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
+ continue;
}
+ DeclareImage(image, binding);
+ }
+ return binding;
+ }
- images.emplace(image.index, StorageImage{image_type, id});
+ u32 DeclareImages(u32 binding) {
+ for (const auto& image : ir.GetImages()) {
+ if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
+ continue;
+ }
+ DeclareImage(image, binding);
}
return binding;
}
+ void DeclareImage(const Image& image, u32& binding) {
+ const auto [dim, arrayed] = GetImageDim(image);
+ constexpr int depth = 0;
+ constexpr bool ms = false;
+ constexpr int sampled = 2; // This won't be accessed with a sampler
+ const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
+ const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
+ const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
+ const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
+ AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
+
+ Decorate(id, spv::Decoration::Binding, binding++);
+ Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+ if (image.is_read && !image.is_written) {
+ Decorate(id, spv::Decoration::NonWritable);
+ } else if (image.is_written && !image.is_read) {
+ Decorate(id, spv::Decoration::NonReadable);
+ }
+
+ images.emplace(image.index, StorageImage{image_type, id});
+ }
+
bool IsRenderTargetEnabled(u32 rt) const {
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
@@ -1256,7 +1280,7 @@ private:
} else {
UNREACHABLE_MSG("Unmanaged offset node type");
}
- pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index,
+ pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
buffer_element);
}
return {OpLoad(t_float, pointer), Type::Float};
@@ -1611,7 +1635,7 @@ private:
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
const Id carry = OpCompositeExtract(t_uint, result, 1);
- return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
+ return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
}
Expression LogicalAssign(Operation operation) {
@@ -1674,7 +1698,7 @@ private:
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 index = meta.sampler.index;
if (meta.sampler.is_buffer) {
- const auto& entry = texel_buffers.at(index);
+ const auto& entry = uniform_texels.at(index);
return OpLoad(entry.image_type, entry.image);
} else {
const auto& entry = sampled_images.at(index);
@@ -1951,39 +1975,20 @@ private:
return {};
}
- Expression AtomicImageAdd(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageMin(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageMax(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageAnd(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageOr(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
+ template <Id (Module::*func)(Id, Id, Id, Id, Id)>
+ Expression AtomicImage(Operation operation) {
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
+ ASSERT(meta.values.size() == 1);
- Expression AtomicImageXor(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
+ const Id coordinate = GetCoordinates(operation, Type::Int);
+ const Id image = images.at(meta.image.index).image;
+ const Id sample = v_uint_zero;
+ const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
- Expression AtomicImageExchange(Operation operation) {
- UNIMPLEMENTED();
- return {};
+ const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
+ const Id semantics = v_uint_zero;
+ const Id value = AsUint(Visit(meta.values[0]));
+ return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
@@ -1998,7 +2003,7 @@ private:
return {v_float_zero, Type::Float};
}
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = Constant(t_uint, 0);
+ const Id semantics = v_uint_zero;
const Id value = AsUint(Visit(operation[1]));
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
@@ -2622,11 +2627,11 @@ private:
&SPIRVDecompiler::ImageLoad,
&SPIRVDecompiler::ImageStore,
- &SPIRVDecompiler::AtomicImageAdd,
- &SPIRVDecompiler::AtomicImageAnd,
- &SPIRVDecompiler::AtomicImageOr,
- &SPIRVDecompiler::AtomicImageXor,
- &SPIRVDecompiler::AtomicImageExchange,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
@@ -2768,8 +2773,11 @@ private:
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
+ const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
+
const Id v_float_zero = Constant(t_float, 0.0f);
const Id v_float_one = Constant(t_float, 1.0f);
+ const Id v_uint_zero = Constant(t_uint, 0);
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
const Id v_varying_default =
@@ -2794,15 +2802,15 @@ private:
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
- std::map<u32, TexelBuffer> texel_buffers;
+ std::map<u32, TexelBuffer> uniform_texels;
std::map<u32, SampledImage> sampled_images;
std::map<u32, StorageImage> images;
+ std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id instance_index{};
Id vertex_index{};
Id base_instance{};
Id base_vertex{};
- std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id frag_depth{};
Id frag_coord{};
Id front_facing{};
@@ -3058,13 +3066,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
for (const auto& sampler : ir.GetSamplers()) {
if (sampler.is_buffer) {
- entries.texel_buffers.emplace_back(sampler);
+ entries.uniform_texels.emplace_back(sampler);
} else {
entries.samplers.emplace_back(sampler);
}
}
for (const auto& image : ir.GetImages()) {
- entries.images.emplace_back(image);
+ if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
+ entries.storage_texels.emplace_back(image);
+ } else {
+ entries.images.emplace_back(image);
+ }
}
for (const auto& attribute : ir.GetInputAttributes()) {
if (IsGenericAttribute(attribute)) {
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index b7af26388..2b0e90396 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -21,8 +21,9 @@ class VKDevice;
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using TexelBufferEntry = VideoCommon::Shader::Sampler;
+using UniformTexelEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler;
+using StorageTexelEntry = VideoCommon::Shader::Image;
using ImageEntry = VideoCommon::Shader::Image;
constexpr u32 DESCRIPTOR_SET = 0;
@@ -66,13 +67,15 @@ private:
struct ShaderEntries {
u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
- texel_buffers.size() + samplers.size() + images.size());
+ uniform_texels.size() + samplers.size() + storage_texels.size() +
+ images.size());
}
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers;
- std::vector<TexelBufferEntry> texel_buffers;
+ std::vector<UniformTexelEntry> uniform_texels;
std::vector<SamplerEntry> samplers;
+ std::vector<StorageTexelEntry> storage_texels;
std::vector<ImageEntry> images;
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index 112df9c71..c1a218d76 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons
const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
std::memcpy(data.get(), code_data, code_size);
- VkShaderModuleCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.codeSize = code_size;
- ci.pCode = data.get();
- return device.GetLogical().CreateShaderModule(ci);
+ return device.GetLogical().CreateShaderModule({
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .codeSize = code_size,
+ .pCode = data.get(),
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 45c180221..2fd3b7f39 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -10,36 +10,18 @@
#include "common/bit_util.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
-VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
- u64 last_epoch)
- : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
+VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_)
+ : buffer{std::move(buffer_)} {}
-VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
- buffer = std::move(rhs.buffer);
- watch = std::move(rhs.watch);
- last_epoch = rhs.last_epoch;
-}
-
-VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
-
-VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
- StagingBuffer&& rhs) noexcept {
- buffer = std::move(rhs.buffer);
- watch = std::move(rhs.watch);
- last_epoch = rhs.last_epoch;
- return *this;
-}
-
-VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler)
- : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {}
+VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_,
+ VKScheduler& scheduler_)
+ : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {}
VKStagingBufferPool::~VKStagingBufferPool() = default;
@@ -51,7 +33,6 @@ VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visib
}
void VKStagingBufferPool::TickFrame() {
- ++epoch;
current_delete_level = (current_delete_level + 1) % NumLevels;
ReleaseCache(true);
@@ -59,11 +40,12 @@ void VKStagingBufferPool::TickFrame() {
}
VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
- for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
- if (entry.watch.TryWatch(scheduler.GetFence())) {
- entry.last_epoch = epoch;
- return &*entry.buffer;
+ for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
+ if (!scheduler.IsFree(entry.tick)) {
+ continue;
}
+ entry.tick = scheduler.CurrentTick();
+ return &*entry.buffer;
}
return nullptr;
}
@@ -71,24 +53,25 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
const u32 log2 = Common::Log2Ceil64(size);
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = 1ULL << log2;
- ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
-
auto buffer = std::make_unique<VKBuffer>();
- buffer->handle = device.GetLogical().CreateBuffer(ci);
+ buffer->handle = device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = 1ULL << log2,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
- auto& entries = GetCache(host_visible)[log2].entries;
- return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
+ std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries;
+ StagingBuffer& entry = entries.emplace_back(std::move(buffer));
+ entry.tick = scheduler.CurrentTick();
+ return *entry.buffer;
}
VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
@@ -110,9 +93,8 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo
auto& entries = staging.entries;
const std::size_t old_size = entries.size();
- const auto is_deleteable = [this](const auto& entry) {
- static constexpr u64 epochs_to_destroy = 180;
- return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
+ const auto is_deleteable = [this](const StagingBuffer& entry) {
+ return scheduler.IsFree(entry.tick);
};
const std::size_t begin_offset = staging.delete_index;
const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 3c4901437..2dd5049ac 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -10,13 +10,11 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
-class VKFenceWatch;
class VKScheduler;
struct VKBuffer final {
@@ -36,16 +34,10 @@ public:
private:
struct StagingBuffer final {
- explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
- StagingBuffer(StagingBuffer&& rhs) noexcept;
- StagingBuffer(const StagingBuffer&) = delete;
- ~StagingBuffer();
-
- StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
+ explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer);
std::unique_ptr<VKBuffer> buffer;
- VKFenceWatch watch;
- u64 last_epoch = 0;
+ u64 tick = 0;
};
struct StagingBuffers final {
@@ -73,8 +65,6 @@ private:
StagingBuffersCache host_staging_buffers;
StagingBuffersCache device_staging_buffers;
- u64 epoch = 0;
-
std::size_t current_delete_level = 0;
};
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 94a89e388..5d2c4a796 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -36,6 +36,14 @@ Flags MakeInvalidationFlags() {
flags[BlendConstants] = true;
flags[DepthBounds] = true;
flags[StencilProperties] = true;
+ flags[CullMode] = true;
+ flags[DepthBoundsEnable] = true;
+ flags[DepthTestEnable] = true;
+ flags[DepthWriteEnable] = true;
+ flags[DepthCompareOp] = true;
+ flags[FrontFace] = true;
+ flags[StencilOp] = true;
+ flags[StencilTestEnable] = true;
return flags;
}
@@ -75,14 +83,58 @@ void SetupDirtyStencilProperties(Tables& tables) {
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
-} // Anonymous namespace
+void SetupDirtyCullMode(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(cull_face)] = CullMode;
+ table[OFF(cull_test_enabled)] = CullMode;
+}
+
+void SetupDirtyDepthBoundsEnable(Tables& tables) {
+ tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
+}
+
+void SetupDirtyDepthTestEnable(Tables& tables) {
+ tables[0][OFF(depth_test_enable)] = DepthTestEnable;
+}
+
+void SetupDirtyDepthWriteEnable(Tables& tables) {
+ tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
+}
+
+void SetupDirtyDepthCompareOp(Tables& tables) {
+ tables[0][OFF(depth_test_func)] = DepthCompareOp;
+}
-StateTracker::StateTracker(Core::System& system)
- : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
+void SetupDirtyFrontFace(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(front_face)] = FrontFace;
+ table[OFF(screen_y_control)] = FrontFace;
+}
+
+void SetupDirtyStencilOp(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(stencil_front_op_fail)] = StencilOp;
+ table[OFF(stencil_front_op_zfail)] = StencilOp;
+ table[OFF(stencil_front_op_zpass)] = StencilOp;
+ table[OFF(stencil_front_func_func)] = StencilOp;
+ table[OFF(stencil_back_op_fail)] = StencilOp;
+ table[OFF(stencil_back_op_zfail)] = StencilOp;
+ table[OFF(stencil_back_op_zpass)] = StencilOp;
+ table[OFF(stencil_back_func_func)] = StencilOp;
+
+ // Table 0 is used by StencilProperties
+ tables[1][OFF(stencil_two_side_enable)] = StencilOp;
+}
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
- auto& tables = dirty.tables;
+void SetupDirtyStencilTestEnable(Tables& tables) {
+ tables[0][OFF(stencil_enable)] = StencilTestEnable;
+}
+
+} // Anonymous namespace
+
+StateTracker::StateTracker(Tegra::GPU& gpu)
+ : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
+ auto& tables = gpu.Maxwell3D().dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@@ -90,10 +142,14 @@ void StateTracker::Initialize() {
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
-}
-
-void StateTracker::InvalidateCommandBufferState() {
- system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
+ SetupDirtyCullMode(tables);
+ SetupDirtyDepthBoundsEnable(tables);
+ SetupDirtyDepthTestEnable(tables);
+ SetupDirtyDepthWriteEnable(tables);
+ SetupDirtyDepthCompareOp(tables);
+ SetupDirtyFrontFace(tables);
+ SetupDirtyStencilOp(tables);
+ SetupDirtyStencilTestEnable(tables);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 03bc415b2..1de789e57 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -26,6 +26,15 @@ enum : u8 {
DepthBounds,
StencilProperties,
+ CullMode,
+ DepthBoundsEnable,
+ DepthTestEnable,
+ DepthWriteEnable,
+ DepthCompareOp,
+ FrontFace,
+ StencilOp,
+ StencilTestEnable,
+
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
@@ -33,12 +42,15 @@ static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
-public:
- explicit StateTracker(Core::System& system);
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
- void Initialize();
+public:
+ explicit StateTracker(Tegra::GPU& gpu);
- void InvalidateCommandBufferState();
+ void InvalidateCommandBufferState() {
+ flags |= invalidation_flags;
+ current_topology = INVALID_TOPOLOGY;
+ }
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
@@ -64,16 +76,60 @@ public:
return Exchange(Dirty::StencilProperties, false);
}
+ bool TouchCullMode() {
+ return Exchange(Dirty::CullMode, false);
+ }
+
+ bool TouchDepthBoundsTestEnable() {
+ return Exchange(Dirty::DepthBoundsEnable, false);
+ }
+
+ bool TouchDepthTestEnable() {
+ return Exchange(Dirty::DepthTestEnable, false);
+ }
+
+ bool TouchDepthBoundsEnable() {
+ return Exchange(Dirty::DepthBoundsEnable, false);
+ }
+
+ bool TouchDepthWriteEnable() {
+ return Exchange(Dirty::DepthWriteEnable, false);
+ }
+
+ bool TouchDepthCompareOp() {
+ return Exchange(Dirty::DepthCompareOp, false);
+ }
+
+ bool TouchFrontFace() {
+ return Exchange(Dirty::FrontFace, false);
+ }
+
+ bool TouchStencilOp() {
+ return Exchange(Dirty::StencilOp, false);
+ }
+
+ bool TouchStencilTestEnable() {
+ return Exchange(Dirty::StencilTestEnable, false);
+ }
+
+ bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
+ const bool has_changed = current_topology != new_topology;
+ current_topology = new_topology;
+ return has_changed;
+ }
+
private:
+ static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
+
bool Exchange(std::size_t id, bool new_value) const noexcept {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
const bool is_dirty = flags[id];
flags[id] = new_value;
return is_dirty;
}
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
+ Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 868447af2..1b59612b9 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -11,7 +11,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
@@ -57,9 +56,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
} // Anonymous namespace
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_,
VkBufferUsageFlags usage)
- : device{device}, scheduler{scheduler} {
+ : device{device_}, scheduler{scheduler_} {
CreateBuffers(usage);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
@@ -111,7 +110,7 @@ void VKStreamBuffer::Unmap(u64 size) {
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
- watch.fence.Watch(scheduler.GetFence());
+ watch.tick = scheduler.CurrentTick();
}
void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
@@ -121,31 +120,29 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
- const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024;
-
- VkBufferCreateInfo buffer_ci;
- buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- buffer_ci.pNext = nullptr;
- buffer_ci.flags = 0;
- buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size);
- buffer_ci.usage = usage;
- buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- buffer_ci.queueFamilyIndexCount = 0;
- buffer_ci.pQueueFamilyIndices = nullptr;
-
- buffer = device.GetLogical().CreateBuffer(buffer_ci);
+ // As per DXVK's example, using `heap_size / 2`
+ const VkDeviceSize allocable_size = heap_size / 2;
+ buffer = device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
const u32 required_flags = requirements.memoryTypeBits;
stream_buffer_size = static_cast<u64>(requirements.size);
- VkMemoryAllocateInfo memory_ai;
- memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
- memory_ai.pNext = nullptr;
- memory_ai.allocationSize = requirements.size;
- memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags);
-
- memory = device.GetLogical().AllocateMemory(memory_ai);
+ memory = device.GetLogical().AllocateMemory({
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .allocationSize = requirements.size,
+ .memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
+ });
buffer.BindMemory(*memory, 0);
}
@@ -160,7 +157,7 @@ void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
- watch.fence.Wait();
+ scheduler.Wait(watch.tick);
++wait_cursor;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index dfddf7ad6..5e15ad78f 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -14,7 +14,6 @@
namespace Vulkan {
class VKDevice;
-class VKFence;
class VKFenceWatch;
class VKScheduler;
@@ -35,13 +34,17 @@ public:
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size);
- VkBuffer GetHandle() const {
+ VkBuffer Handle() const noexcept {
return *buffer;
}
+ u64 Address() const noexcept {
+ return 0;
+ }
+
private:
- struct Watch final {
- VKFenceWatch fence;
+ struct Watch {
+ u64 tick{};
u64 upper_bound{};
};
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index bffd8f32a..9636a7c65 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -12,7 +12,7 @@
#include "core/core.h"
#include "core/frontend/framebuffer_layout.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/wrapper.h"
@@ -56,8 +56,8 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device)
- : surface{surface}, device{device} {}
+VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_)
+ : surface{surface_}, device{device_}, scheduler{scheduler_} {}
VKSwapchain::~VKSwapchain() = default;
@@ -75,35 +75,33 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
CreateSemaphores();
CreateImageViews();
- fences.resize(image_count, nullptr);
+ resource_ticks.clear();
+ resource_ticks.resize(image_count);
}
void VKSwapchain::AcquireNextImage() {
device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
*present_semaphores[frame_index], {}, &image_index);
- if (auto& fence = fences[image_index]; fence) {
- fence->Wait();
- fence->Release();
- fence = nullptr;
- }
+ scheduler.Wait(resource_ticks[image_index]);
}
-bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
+bool VKSwapchain::Present(VkSemaphore render_semaphore) {
const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
bool recreated = false;
- VkPresentInfoKHR present_info;
- present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
- present_info.pNext = nullptr;
- present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U;
- present_info.pWaitSemaphores = semaphores.data();
- present_info.swapchainCount = 1;
- present_info.pSwapchains = swapchain.address();
- present_info.pImageIndices = &image_index;
- present_info.pResults = nullptr;
+ const VkPresentInfoKHR present_info{
+ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ .pNext = nullptr,
+ .waitSemaphoreCount = render_semaphore ? 2U : 1U,
+ .pWaitSemaphores = semaphores.data(),
+ .swapchainCount = 1,
+ .pSwapchains = swapchain.address(),
+ .pImageIndices = &image_index,
+ .pResults = nullptr,
+ };
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
@@ -122,8 +120,7 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
break;
}
- ASSERT(fences[image_index] == nullptr);
- fences[image_index] = &fence;
+ resource_ticks[image_index] = scheduler.CurrentTick();
frame_index = (frame_index + 1) % static_cast<u32>(image_count);
return recreated;
}
@@ -147,24 +144,26 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
requested_image_count = capabilities.maxImageCount;
}
- VkSwapchainCreateInfoKHR swapchain_ci;
- swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
- swapchain_ci.pNext = nullptr;
- swapchain_ci.flags = 0;
- swapchain_ci.surface = surface;
- swapchain_ci.minImageCount = requested_image_count;
- swapchain_ci.imageFormat = surface_format.format;
- swapchain_ci.imageColorSpace = surface_format.colorSpace;
- swapchain_ci.imageArrayLayers = 1;
- swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
- swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
- swapchain_ci.queueFamilyIndexCount = 0;
- swapchain_ci.pQueueFamilyIndices = nullptr;
- swapchain_ci.preTransform = capabilities.currentTransform;
- swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
- swapchain_ci.presentMode = present_mode;
- swapchain_ci.clipped = VK_FALSE;
- swapchain_ci.oldSwapchain = nullptr;
+ VkSwapchainCreateInfoKHR swapchain_ci{
+ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .surface = surface,
+ .minImageCount = requested_image_count,
+ .imageFormat = surface_format.format,
+ .imageColorSpace = surface_format.colorSpace,
+ .imageExtent = {},
+ .imageArrayLayers = 1,
+ .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .preTransform = capabilities.currentTransform,
+ .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+ .presentMode = present_mode,
+ .clipped = VK_FALSE,
+ .oldSwapchain = nullptr,
+ };
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
@@ -173,8 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
- } else {
- swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
}
// Request the size again to reduce the possibility of a TOCTOU race condition.
@@ -200,20 +197,29 @@ void VKSwapchain::CreateSemaphores() {
}
void VKSwapchain::CreateImageViews() {
- VkImageViewCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- // ci.image
- ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
- ci.format = image_format;
- ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
- ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- ci.subresourceRange.baseMipLevel = 0;
- ci.subresourceRange.levelCount = 1;
- ci.subresourceRange.baseArrayLayer = 0;
- ci.subresourceRange.layerCount = 1;
+ VkImageViewCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = {},
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image_format,
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index a35d61345..6b39befdf 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -16,11 +16,11 @@ struct FramebufferLayout;
namespace Vulkan {
class VKDevice;
-class VKFence;
+class VKScheduler;
class VKSwapchain {
public:
- explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device);
+ explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler);
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.
@@ -31,7 +31,7 @@ public:
/// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
/// recreated. Takes responsability for the ownership of fence.
- bool Present(VkSemaphore render_semaphore, VKFence& fence);
+ bool Present(VkSemaphore render_semaphore);
/// Returns true when the framebuffer layout has changed.
bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
@@ -74,6 +74,7 @@ private:
const VkSurfaceKHR surface;
const VKDevice& device;
+ VKScheduler& scheduler;
vk::SwapchainKHR swapchain;
@@ -81,7 +82,7 @@ private:
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
- std::vector<VKFence*> fences;
+ std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
u32 image_index{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 2f1d5021d..f2c8f2ae1 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) {
vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
std::size_t host_memory_size) {
// TODO(Rodrigo): Move texture buffer creation to the buffer cache
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = static_cast<VkDeviceSize>(host_memory_size);
- ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
- VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- return device.GetLogical().CreateBuffer(ci);
+ return device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = static_cast<VkDeviceSize>(host_memory_size),
+ .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
}
VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
@@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
std::size_t host_memory_size) {
ASSERT(params.IsBuffer());
- VkBufferViewCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.buffer = buffer;
- ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
- ci.offset = 0;
- ci.range = static_cast<VkDeviceSize>(host_memory_size);
- return ci;
+ return {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .buffer = buffer,
+ .format =
+ MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format,
+ .offset = 0,
+ .range = static_cast<VkDeviceSize>(host_memory_size),
+ };
}
VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
@@ -130,23 +132,24 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
const auto [format, attachable, storage] =
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
- VkImageCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.imageType = SurfaceTargetToImage(params.target);
- ci.format = format;
- ci.mipLevels = params.num_levels;
- ci.arrayLayers = static_cast<u32>(params.GetNumLayers());
- ci.samples = VK_SAMPLE_COUNT_1_BIT;
- ci.tiling = VK_IMAGE_TILING_OPTIMAL;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-
- ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ VkImageCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .imageType = SurfaceTargetToImage(params.target),
+ .format = format,
+ .extent = {},
+ .mipLevels = params.num_levels,
+ .arrayLayers = static_cast<u32>(params.GetNumLayers()),
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ };
if (attachable) {
ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
: VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
@@ -167,6 +170,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
ci.extent = {params.width, params.height, 1};
break;
case SurfaceTarget::Texture3D:
+ ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
ci.extent = {params.width, params.height, params.depth};
break;
case SurfaceTarget::TextureBuffer:
@@ -176,14 +180,18 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
return ci;
}
+u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
+ return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+ (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+}
+
} // Anonymous namespace
-CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
GPUVAddr gpu_addr, const SurfaceParams& params)
- : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
- device{device}, resource_manager{resource_manager},
+ : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device},
memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
if (params.IsBuffer()) {
buffer = CreateBuffer(device, params, host_memory_size);
@@ -203,9 +211,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
}
// TODO(Rodrigo): Move this to a virtual function.
- main_view = CreateViewInner(
- ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
- true);
+ u32 num_layers = 1;
+ if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
+ num_layers = params.depth;
+ }
+ main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
}
CachedSurface::~CachedSurface() = default;
@@ -224,7 +234,7 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
UNIMPLEMENTED_IF(params.IsBuffer());
- if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+ if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
}
@@ -253,12 +263,8 @@ void CachedSurface::DecorateSurfaceName() {
}
View CachedSurface::CreateView(const ViewParams& params) {
- return CreateViewInner(params, false);
-}
-
-View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
// TODO(Rodrigo): Add name decorations
- return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
+ return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
}
void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -276,12 +282,10 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
- barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
- barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT;
- barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT;
- barrier.srcQueueFamilyIndex = 0;
- barrier.dstQueueFamilyIndex = 0;
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway
+ barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = dst_buffer;
barrier.offset = 0;
barrier.size = size;
@@ -318,22 +322,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
}
VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
- VkBufferImageCopy copy;
- copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted);
- copy.bufferRowLength = 0;
- copy.bufferImageHeight = 0;
- copy.imageSubresource.aspectMask = image->GetAspectMask();
- copy.imageSubresource.mipLevel = level;
- copy.imageSubresource.baseArrayLayer = 0;
- copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers());
- copy.imageOffset.x = 0;
- copy.imageOffset.y = 0;
- copy.imageOffset.z = 0;
- copy.imageExtent.width = params.GetMipWidth(level);
- copy.imageExtent.height = params.GetMipHeight(level);
- copy.imageExtent.depth =
- params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
- return copy;
+ return {
+ .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted),
+ .bufferRowLength = 0,
+ .bufferImageHeight = 0,
+ .imageSubresource =
+ {
+ .aspectMask = image->GetAspectMask(),
+ .mipLevel = level,
+ .baseArrayLayer = 0,
+ .layerCount = static_cast<u32>(params.GetNumLayers()),
+ },
+ .imageOffset = {.x = 0, .y = 0, .z = 0},
+ .imageExtent =
+ {
+ .width = params.GetMipWidth(level),
+ .height = params.GetMipHeight(level),
+ .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U,
+ },
+ };
}
VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
@@ -342,18 +349,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
}
CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy)
+ const ViewParams& params)
: VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
- base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
- num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
- : VK_IMAGE_VIEW_TYPE_1D} {}
+ base_level{params.base_level}, num_levels{params.num_levels},
+ image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ base_layer = 0;
+ num_layers = 1;
+ base_slice = params.base_layer;
+ num_slices = params.num_layers;
+ } else {
+ base_layer = params.base_layer;
+ num_layers = params.num_layers;
+ }
+}
CachedSurfaceView::~CachedSurfaceView() = default;
-VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
+VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
+ SwizzleSource z_source, SwizzleSource w_source) {
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
if (last_image_view && last_swizzle == new_swizzle) {
return last_image_view;
@@ -368,7 +384,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
- if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+ if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
// A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
std::swap(swizzle[0], swizzle[2]);
}
@@ -380,11 +396,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
const bool is_first = x_source == SwizzleSource::R;
switch (params.pixel_format) {
- case VideoCore::Surface::PixelFormat::Z24S8:
- case VideoCore::Surface::PixelFormat::Z32FS8:
+ case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
+ case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
break;
- case VideoCore::Surface::PixelFormat::S8Z24:
+ case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
break;
default:
@@ -399,37 +415,93 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
});
}
- VkImageViewCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.image = surface.GetImageHandle();
- ci.viewType = image_view_type;
- ci.format = surface.GetImage().GetFormat();
- ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
- ci.subresourceRange.aspectMask = aspect;
- ci.subresourceRange.baseMipLevel = base_level;
- ci.subresourceRange.levelCount = num_levels;
- ci.subresourceRange.baseArrayLayer = base_layer;
- ci.subresourceRange.layerCount = num_layers;
- image_view = device.GetLogical().CreateImageView(ci);
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ASSERT(base_slice == 0);
+ ASSERT(num_slices == params.depth);
+ }
+
+ image_view = device.GetLogical().CreateImageView({
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = surface.GetImageHandle(),
+ .viewType = image_view_type,
+ .format = surface.GetImage().GetFormat(),
+ .components =
+ {
+ .r = swizzle[0],
+ .g = swizzle[1],
+ .b = swizzle[2],
+ .a = swizzle[3],
+ },
+ .subresourceRange =
+ {
+ .aspectMask = aspect,
+ .baseMipLevel = base_level,
+ .levelCount = num_levels,
+ .baseArrayLayer = base_layer,
+ .layerCount = num_layers,
+ },
+ });
return last_image_view = *image_view;
}
-VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool)
- : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
- staging_pool{staging_pool} {}
+VkImageView CachedSurfaceView::GetAttachment() {
+ if (render_target) {
+ return *render_target;
+ }
+
+ VkImageViewCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = surface.GetImageHandle(),
+ .viewType = VK_IMAGE_VIEW_TYPE_1D,
+ .format = surface.GetImage().GetFormat(),
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = aspect_mask,
+ .baseMipLevel = base_level,
+ .levelCount = num_levels,
+ .baseArrayLayer = 0,
+ .layerCount = 0,
+ },
+ };
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
+ ci.subresourceRange.baseArrayLayer = base_slice;
+ ci.subresourceRange.layerCount = num_slices;
+ } else {
+ ci.viewType = image_view_type;
+ ci.subresourceRange.baseArrayLayer = base_layer;
+ ci.subresourceRange.layerCount = num_layers;
+ }
+ render_target = device.GetLogical().CreateImageView(ci);
+ return *render_target;
+}
+
+VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const VKDevice& device_,
+ VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
+ VKStagingBufferPool& staging_pool_)
+ : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()),
+ device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
+ staging_pool_} {}
VKTextureCache::~VKTextureCache() = default;
Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
- return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
- scheduler, staging_pool, gpu_addr, params);
+ return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool,
+ gpu_addr, params);
}
void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -456,24 +528,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
- VkImageCopy copy;
- copy.srcSubresource.aspectMask = src_surface->GetAspectMask();
- copy.srcSubresource.mipLevel = copy_params.source_level;
- copy.srcSubresource.baseArrayLayer = copy_params.source_z;
- copy.srcSubresource.layerCount = num_layers;
- copy.srcOffset.x = copy_params.source_x;
- copy.srcOffset.y = copy_params.source_y;
- copy.srcOffset.z = 0;
- copy.dstSubresource.aspectMask = dst_surface->GetAspectMask();
- copy.dstSubresource.mipLevel = copy_params.dest_level;
- copy.dstSubresource.baseArrayLayer = dst_base_layer;
- copy.dstSubresource.layerCount = num_layers;
- copy.dstOffset.x = copy_params.dest_x;
- copy.dstOffset.y = copy_params.dest_y;
- copy.dstOffset.z = dst_offset_z;
- copy.extent.width = copy_params.width;
- copy.extent.height = copy_params.height;
- copy.extent.depth = extent_z;
+ const VkImageCopy copy{
+ .srcSubresource =
+ {
+ .aspectMask = src_surface->GetAspectMask(),
+ .mipLevel = copy_params.source_level,
+ .baseArrayLayer = copy_params.source_z,
+ .layerCount = num_layers,
+ },
+ .srcOffset =
+ {
+ .x = static_cast<s32>(copy_params.source_x),
+ .y = static_cast<s32>(copy_params.source_y),
+ .z = 0,
+ },
+ .dstSubresource =
+ {
+ .aspectMask = dst_surface->GetAspectMask(),
+ .mipLevel = copy_params.dest_level,
+ .baseArrayLayer = dst_base_layer,
+ .layerCount = num_layers,
+ },
+ .dstOffset =
+ {
+ .x = static_cast<s32>(copy_params.dest_x),
+ .y = static_cast<s32>(copy_params.dest_y),
+ .z = static_cast<s32>(dst_offset_z),
+ },
+ .extent =
+ {
+ .width = copy_params.width,
+ .height = copy_params.height,
+ .depth = extent_z,
+ },
+ };
const VkImage src_image = src_surface->GetImageHandle();
const VkImage dst_image = dst_surface->GetImageHandle();
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f211ccb1e..39202feba 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -15,10 +15,6 @@
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/texture_cache.h"
-namespace Core {
-class System;
-}
-
namespace VideoCore {
class RasterizerInterface;
}
@@ -27,7 +23,6 @@ namespace Vulkan {
class RasterizerVulkan;
class VKDevice;
-class VKResourceManager;
class VKScheduler;
class VKStagingBufferPool;
@@ -45,8 +40,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView;
public:
- explicit CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+ explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
GPUVAddr gpu_addr, const SurfaceParams& params);
~CachedSurface();
@@ -91,7 +85,6 @@ protected:
void DecorateSurfaceName();
View CreateView(const ViewParams& params) override;
- View CreateViewInner(const ViewParams& params, bool is_proxy);
private:
void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -102,9 +95,7 @@ private:
VkImageSubresourceRange GetImageSubresourceRange() const;
- Core::System& system;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
@@ -120,23 +111,20 @@ private:
class CachedSurfaceView final : public VideoCommon::ViewBase {
public:
explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy);
+ const ViewParams& params);
~CachedSurfaceView();
- VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
+ Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source,
+ Tegra::Texture::SwizzleSource w_source);
+
+ VkImageView GetAttachment();
bool IsSameSurface(const CachedSurfaceView& rhs) const {
return &surface == &rhs.surface;
}
- VkImageView GetHandle() {
- return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
- Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
- }
-
u32 GetWidth() const {
return params.GetMipWidth(base_level);
}
@@ -180,14 +168,6 @@ public:
}
private:
- static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
- }
-
// Store a copy of these values to avoid double dereference when reading them
const SurfaceParams params;
const VkImage image;
@@ -196,24 +176,27 @@ private:
const VKDevice& device;
CachedSurface& surface;
- const u32 base_layer;
- const u32 num_layers;
const u32 base_level;
const u32 num_levels;
const VkImageViewType image_view_type;
+ u32 base_layer = 0;
+ u32 num_layers = 0;
+ u32 base_slice = 0;
+ u32 num_slices = 0;
VkImageView last_image_view = nullptr;
u32 last_swizzle = 0;
+ vk::ImageView render_target;
std::unordered_map<u32, vk::ImageView> view_cache;
};
class VKTextureCache final : public TextureCacheBase {
public:
- explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool);
+ explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
+ const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKTextureCache();
private:
@@ -228,7 +211,6 @@ private:
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 681ecde98..351c048d2 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {
}
void VKUpdateDescriptorQueue::Acquire() {
- entries.clear();
-}
+ // Minimum number of entries required.
+ // This is the maximum number of entries a single draw call migth use.
+ static constexpr std::size_t MIN_ENTRIES = 0x400;
-void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
- VkDescriptorSet set) {
- if (payload.size() + entries.size() >= payload.max_size()) {
+ if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload.clear();
}
+ upload_start = &*payload.end();
+}
- // TODO(Rodrigo): Rework to write the payload directly
- const auto payload_start = payload.data() + payload.size();
- for (const auto& entry : entries) {
- if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
- payload.push_back(*image);
- } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
- payload.push_back(*buffer);
- } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
- payload.push_back(*texel);
- } else {
- UNREACHABLE();
- }
- }
-
- scheduler.Record(
- [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
- logical->UpdateDescriptorSet(set, update_template, payload_start);
- });
+void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
+ VkDescriptorSet set) {
+ const void* const data = upload_start;
+ const vk::Device* const logical = &device.GetLogical();
+ scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
+ logical->UpdateDescriptorSet(set, update_template, data);
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index cc7e3dff4..945320c72 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,17 +15,13 @@ namespace Vulkan {
class VKDevice;
class VKScheduler;
-class DescriptorUpdateEntry {
-public:
- explicit DescriptorUpdateEntry() {}
-
- DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
+struct DescriptorUpdateEntry {
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
- DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}
+ DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
- DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
+ DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
-private:
union {
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
@@ -45,32 +41,34 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkSampler sampler, VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
}
void AddImage(VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
}
void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
- entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
+ payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- entries.emplace_back(texel_buffer);
+ payload.emplace_back(texel_buffer);
}
- VkImageLayout* GetLastImageLayout() {
- return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout;
+ VkImageLayout* LastImageLayout() {
+ return &payload.back().image.imageLayout;
}
-private:
- using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
+ const VkImageLayout* LastImageLayout() const {
+ return &payload.back().image.imageLayout;
+ }
+private:
const VKDevice& device;
VKScheduler& scheduler;
- boost::container::static_vector<Variant, 0x400> entries;
+ const DescriptorUpdateEntry* upload_start = nullptr;
boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
};
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 2ce9b0626..1fb14e190 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -88,6 +88,16 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetStencilWriteMask);
X(vkCmdSetViewport);
X(vkCmdWaitEvents);
+ X(vkCmdBindVertexBuffers2EXT);
+ X(vkCmdSetCullModeEXT);
+ X(vkCmdSetDepthBoundsTestEnableEXT);
+ X(vkCmdSetDepthCompareOpEXT);
+ X(vkCmdSetDepthTestEnableEXT);
+ X(vkCmdSetDepthWriteEnableEXT);
+ X(vkCmdSetFrontFaceEXT);
+ X(vkCmdSetPrimitiveTopologyEXT);
+ X(vkCmdSetStencilOpEXT);
+ X(vkCmdSetStencilTestEnableEXT);
X(vkCreateBuffer);
X(vkCreateBufferView);
X(vkCreateCommandPool);
@@ -138,6 +148,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
X(vkGetQueryPoolResults);
+ X(vkGetSemaphoreCounterValueKHR);
X(vkMapMemory);
X(vkQueueSubmit);
X(vkResetFences);
@@ -146,6 +157,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkUpdateDescriptorSetWithTemplateKHR);
X(vkUpdateDescriptorSets);
X(vkWaitForFences);
+ X(vkWaitSemaphoresKHR);
#undef X
}
@@ -153,7 +165,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
bool Load(InstanceDispatch& dld) noexcept {
#define X(name) Proc(dld.name, dld, #name)
- return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties);
+ return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
+ X(vkEnumerateInstanceLayerProperties);
#undef X
}
@@ -251,6 +264,22 @@ const char* ToString(VkResult result) noexcept {
return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT";
case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
+ case VkResult::VK_ERROR_UNKNOWN:
+ return "VK_ERROR_UNKNOWN";
+ case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
+ return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
+ case VkResult::VK_THREAD_IDLE_KHR:
+ return "VK_THREAD_IDLE_KHR";
+ case VkResult::VK_THREAD_DONE_KHR:
+ return "VK_THREAD_DONE_KHR";
+ case VkResult::VK_OPERATION_DEFERRED_KHR:
+ return "VK_OPERATION_DEFERRED_KHR";
+ case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
+ return "VK_OPERATION_NOT_DEFERRED_KHR";
+ case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
+ return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
+ case VkResult::VK_RESULT_MAX_ENUM:
+ return "VK_RESULT_MAX_ENUM";
}
return "Unknown";
}
@@ -366,24 +395,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions,
InstanceDispatch& dld) noexcept {
- VkApplicationInfo application_info;
- application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
- application_info.pNext = nullptr;
- application_info.pApplicationName = "yuzu Emulator";
- application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
- application_info.pEngineName = "yuzu Emulator";
- application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
- application_info.apiVersion = VK_API_VERSION_1_1;
-
- VkInstanceCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.pApplicationInfo = &application_info;
- ci.enabledLayerCount = layers.size();
- ci.ppEnabledLayerNames = layers.data();
- ci.enabledExtensionCount = extensions.size();
- ci.ppEnabledExtensionNames = extensions.data();
+ static constexpr VkApplicationInfo application_info{
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pNext = nullptr,
+ .pApplicationName = "yuzu Emulator",
+ .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
+ .pEngineName = "yuzu Emulator",
+ .engineVersion = VK_MAKE_VERSION(0, 1, 0),
+ .apiVersion = VK_API_VERSION_1_1,
+ };
+
+ const VkInstanceCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .pApplicationInfo = &application_info,
+ .enabledLayerCount = layers.size(),
+ .ppEnabledLayerNames = layers.data(),
+ .enabledExtensionCount = extensions.size(),
+ .ppEnabledExtensionNames = extensions.data(),
+ };
VkInstance instance;
if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
@@ -414,19 +445,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices(
DebugCallback Instance::TryCreateDebugCallback(
PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
- VkDebugUtilsMessengerCreateInfoEXT ci;
- ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT;
- ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
- ci.pfnUserCallback = callback;
- ci.pUserData = nullptr;
+ const VkDebugUtilsMessengerCreateInfoEXT ci{
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .flags = 0,
+ .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
+ .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+ .pfnUserCallback = callback,
+ .pUserData = nullptr,
+ };
VkDebugUtilsMessengerEXT messenger;
if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
@@ -457,12 +489,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c
}
CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
- VkCommandBufferAllocateInfo ai;
- ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
- ai.pNext = nullptr;
- ai.commandPool = handle;
- ai.level = level;
- ai.commandBufferCount = static_cast<u32>(num_buffers);
+ const VkCommandBufferAllocateInfo ai{
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .commandPool = handle,
+ .level = level,
+ .commandBufferCount = static_cast<u32>(num_buffers),
+ };
std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers);
switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) {
@@ -486,17 +519,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
Span<const char*> enabled_extensions, const void* next,
DeviceDispatch& dld) noexcept {
- VkDeviceCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
- ci.pNext = next;
- ci.flags = 0;
- ci.queueCreateInfoCount = queues_ci.size();
- ci.pQueueCreateInfos = queues_ci.data();
- ci.enabledLayerCount = 0;
- ci.ppEnabledLayerNames = nullptr;
- ci.enabledExtensionCount = enabled_extensions.size();
- ci.ppEnabledExtensionNames = enabled_extensions.data();
- ci.pEnabledFeatures = nullptr;
+ const VkDeviceCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pNext = next,
+ .flags = 0,
+ .queueCreateInfoCount = queues_ci.size(),
+ .pQueueCreateInfos = queues_ci.data(),
+ .enabledLayerCount = 0,
+ .ppEnabledLayerNames = nullptr,
+ .enabledExtensionCount = enabled_extensions.size(),
+ .ppEnabledExtensionNames = enabled_extensions.data(),
+ .pEnabledFeatures = nullptr,
+ };
VkDevice device;
if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
@@ -537,11 +571,15 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
}
Semaphore Device::CreateSemaphore() const {
- VkSemaphoreCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
+ static constexpr VkSemaphoreCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ };
+ return CreateSemaphore(ci);
+}
+Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const {
VkSemaphore object;
Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
return Semaphore(object, handle, *dld);
@@ -628,10 +666,12 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
}
Event Device::CreateEvent() const {
- VkEventCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
+ static constexpr VkEventCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ };
+
VkEvent object;
Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
return Event(object, handle, *dld);
@@ -725,8 +765,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
return supported == VK_TRUE;
}
-VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
- noexcept {
+VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
VkSurfaceCapabilitiesKHR capabilities;
Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
return capabilities;
@@ -768,6 +807,19 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
VK_SUCCESS) {
return std::nullopt;
}
+ return std::move(properties);
+}
+
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld) {
+ u32 num;
+ if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ std::vector<VkLayerProperties> properties(num);
+ if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
+ return std::nullopt;
+ }
return properties;
}
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 98937a77a..234e01693 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -141,6 +141,7 @@ struct InstanceDispatch {
PFN_vkCreateInstance vkCreateInstance;
PFN_vkDestroyInstance vkDestroyInstance;
PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
+ PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
PFN_vkCreateDevice vkCreateDevice;
@@ -206,6 +207,16 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;
PFN_vkCmdSetViewport vkCmdSetViewport;
PFN_vkCmdWaitEvents vkCmdWaitEvents;
+ PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT;
+ PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT;
+ PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT;
+ PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT;
+ PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT;
+ PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT;
+ PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT;
+ PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;
+ PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;
+ PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;
PFN_vkCreateBuffer vkCreateBuffer;
PFN_vkCreateBufferView vkCreateBufferView;
PFN_vkCreateCommandPool vkCreateCommandPool;
@@ -256,6 +267,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkGetFenceStatus vkGetFenceStatus;
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
+ PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR;
PFN_vkMapMemory vkMapMemory;
PFN_vkQueueSubmit vkQueueSubmit;
PFN_vkResetFences vkResetFences;
@@ -264,6 +276,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
PFN_vkWaitForFences vkWaitForFences;
+ PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR;
};
/// Loads instance agnostic function pointers.
@@ -539,7 +552,6 @@ using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
-using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>;
using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
@@ -571,7 +583,8 @@ public:
/// Construct a queue handle.
constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
- VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept {
+ VkResult Submit(Span<VkSubmitInfo> submit_infos,
+ VkFence fence = VK_NULL_HANDLE) const noexcept {
return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence);
}
@@ -663,6 +676,44 @@ public:
}
};
+class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
+ using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ [[nodiscard]] u64 GetCounter() const {
+ u64 value;
+ Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
+ return value;
+ }
+
+ /**
+ * Waits for a timeline semaphore on the host.
+ *
+ * @param value Value to wait
+ * @param timeout Time in nanoseconds to timeout
+ * @return True on successful wait, false on timeout
+ */
+ bool Wait(u64 value, u64 timeout = std::numeric_limits<u64>::max()) const {
+ const VkSemaphoreWaitInfoKHR wait_info{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .semaphoreCount = 1,
+ .pSemaphores = &handle,
+ .pValues = &value,
+ };
+ const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout);
+ switch (result) {
+ case VK_SUCCESS:
+ return true;
+ case VK_TIMEOUT:
+ return false;
+ default:
+ throw Exception(result);
+ }
+ }
+};
+
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
@@ -683,6 +734,8 @@ public:
Semaphore CreateSemaphore() const;
+ Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const;
+
Fence CreateFence(const VkFenceCreateInfo& ci) const;
DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const;
@@ -745,8 +798,8 @@ public:
}
VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
- void* data, VkDeviceSize stride, VkQueryResultFlags flags) const
- noexcept {
+ void* data, VkDeviceSize stride,
+ VkQueryResultFlags flags) const noexcept {
return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
flags);
}
@@ -779,7 +832,7 @@ public:
bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
- VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;
+ VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
@@ -838,8 +891,8 @@ public:
dld->vkCmdBindPipeline(handle, bind_point, pipeline);
}
- void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const
- noexcept {
+ void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
+ VkIndexType index_type) const noexcept {
dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type);
}
@@ -852,8 +905,8 @@ public:
BindVertexBuffers(binding, 1, &buffer, &offset);
}
- void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const
- noexcept {
+ void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex,
+ u32 first_instance) const noexcept {
dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance);
}
@@ -863,15 +916,15 @@ public:
first_instance);
}
- void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const
- noexcept {
+ void ClearAttachments(Span<VkClearAttachment> attachments,
+ Span<VkClearRect> rects) const noexcept {
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
rects.data());
}
void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
- VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const
- noexcept {
+ VkImageLayout dst_layout, Span<VkImageBlit> regions,
+ VkFilter filter) const noexcept {
dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
regions.data(), filter);
}
@@ -896,8 +949,8 @@ public:
regions.data());
}
- void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const
- noexcept {
+ void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
+ Span<VkBufferCopy> regions) const noexcept {
dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data());
}
@@ -913,8 +966,8 @@ public:
regions.data());
}
- void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const
- noexcept {
+ void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size,
+ u32 data) const noexcept {
dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data);
}
@@ -968,6 +1021,50 @@ public:
buffer_barriers.data(), image_barriers.size(), image_barriers.data());
}
+ void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers,
+ const VkDeviceSize* offsets, const VkDeviceSize* sizes,
+ const VkDeviceSize* strides) const noexcept {
+ dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets,
+ sizes, strides);
+ }
+
+ void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept {
+ dld->vkCmdSetCullModeEXT(handle, cull_mode);
+ }
+
+ void SetDepthBoundsTestEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
+ void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept {
+ dld->vkCmdSetDepthCompareOpEXT(handle, compare_op);
+ }
+
+ void SetDepthTestEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
+ void SetDepthWriteEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
+ void SetFrontFaceEXT(VkFrontFace front_face) const noexcept {
+ dld->vkCmdSetFrontFaceEXT(handle, front_face);
+ }
+
+ void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept {
+ dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology);
+ }
+
+ void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op,
+ VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept {
+ dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op);
+ }
+
+ void SetStencilTestEnableEXT(bool enable) const noexcept {
+ dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
+ }
+
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
const VkDeviceSize* offsets,
const VkDeviceSize* sizes) const noexcept {
@@ -996,4 +1093,7 @@ private:
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
const InstanceDispatch& dld);
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld);
+
} // namespace Vulkan::vk
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index cca13bcde..8e5a22ab3 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -199,55 +199,48 @@ public:
}
std::optional<u32> GetGotoLabel() const {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTGoto>(&data)) {
return {inner->label};
}
- return {};
+ return std::nullopt;
}
Expr GetGotoCondition() const {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTGoto>(&data)) {
return inner->condition;
}
return nullptr;
}
void MarkLabelUnused() {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (auto* inner = std::get_if<ASTLabel>(&data)) {
inner->unused = true;
}
}
bool IsLabelUnused() const {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTLabel>(&data)) {
return inner->unused;
}
return true;
}
std::optional<u32> GetLabelIndex() const {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTLabel>(&data)) {
return {inner->index};
}
- return {};
+ return std::nullopt;
}
Expr GetIfCondition() const {
- auto inner = std::get_if<ASTIfThen>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
return inner->condition;
}
return nullptr;
}
void SetGotoCondition(Expr new_condition) {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (auto* inner = std::get_if<ASTGoto>(&data)) {
inner->condition = std::move(new_condition);
}
}
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
new file mode 100644
index 000000000..aabd62c5c
--- /dev/null
+++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,221 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/shader/async_shaders.h"
+
+namespace VideoCommon::Shader {
+
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+
+AsyncShaders::~AsyncShaders() {
+ KillWorkers();
+}
+
+void AsyncShaders::AllocateWorkers() {
+ // Max worker threads we should allow
+ constexpr u32 MAX_THREADS = 4;
+ // Deduce how many threads we can use
+ const u32 threads_used = std::thread::hardware_concurrency() / 4;
+ // Always allow at least 1 thread regardless of our settings
+ const auto max_worker_count = std::max(1U, threads_used);
+ // Don't use more than MAX_THREADS
+ const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+
+ // If we already have workers queued, ignore
+ if (num_workers == worker_threads.size()) {
+ return;
+ }
+
+ // If workers already exist, clear them
+ if (!worker_threads.empty()) {
+ FreeWorkers();
+ }
+
+ // Create workers
+ for (std::size_t i = 0; i < num_workers; i++) {
+ context_list.push_back(emu_window.CreateSharedContext());
+ worker_threads.push_back(
+ std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
+ }
+}
+
+void AsyncShaders::FreeWorkers() {
+ // Mark all threads to quit
+ is_thread_exiting.store(true);
+ cv.notify_all();
+ for (auto& thread : worker_threads) {
+ thread.join();
+ }
+ // Clear our shared contexts
+ context_list.clear();
+
+ // Clear our worker threads
+ worker_threads.clear();
+}
+
+void AsyncShaders::KillWorkers() {
+ is_thread_exiting.store(true);
+ for (auto& thread : worker_threads) {
+ thread.detach();
+ }
+ // Clear our shared contexts
+ context_list.clear();
+
+ // Clear our worker threads
+ worker_threads.clear();
+}
+
+bool AsyncShaders::HasWorkQueued() const {
+ return !pending_queue.empty();
+}
+
+bool AsyncShaders::HasCompletedWork() const {
+ std::shared_lock lock{completed_mutex};
+ return !finished_work.empty();
+}
+
+bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
+ const auto& regs = gpu.Maxwell3D().regs;
+
+ // If something is using depth, we can assume that games are not rendering anything which will
+ // be used one time.
+ if (regs.zeta_enable) {
+ return true;
+ }
+
+ // If games are using a small index count, we can assume these are full screen quads. Usually
+ // these shaders are only used once for building textures so we can assume they can't be built
+ // async
+ if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
+ return false;
+ }
+
+ return true;
+}
+
+std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
+ std::vector<Result> results;
+ {
+ std::unique_lock lock{completed_mutex};
+ results.assign(std::make_move_iterator(finished_work.begin()),
+ std::make_move_iterator(finished_work.end()));
+ finished_work.clear();
+ }
+ return results;
+}
+
+void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
+ Tegra::Engines::ShaderType shader_type, u64 uid,
+ std::vector<u64> code, std::vector<u64> code_b,
+ u32 main_offset,
+ VideoCommon::Shader::CompilerSettings compiler_settings,
+ const VideoCommon::Shader::Registry& registry,
+ VAddr cpu_addr) {
+ WorkerParams params{
+ .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+ .device = &device,
+ .shader_type = shader_type,
+ .uid = uid,
+ .code = std::move(code),
+ .code_b = std::move(code_b),
+ .main_offset = main_offset,
+ .compiler_settings = compiler_settings,
+ .registry = registry,
+ .cpu_address = cpu_addr,
+ };
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push(std::move(params));
+ cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+ const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+ Vulkan::VKDescriptorPool& descriptor_pool,
+ Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+ Vulkan::VKRenderPassCache& renderpass_cache,
+ std::vector<VkDescriptorSetLayoutBinding> bindings,
+ Vulkan::SPIRVProgram program,
+ Vulkan::GraphicsPipelineCacheKey key) {
+ WorkerParams params{
+ .backend = Backend::Vulkan,
+ .pp_cache = pp_cache,
+ .vk_device = &device,
+ .scheduler = &scheduler,
+ .descriptor_pool = &descriptor_pool,
+ .update_descriptor_queue = &update_descriptor_queue,
+ .renderpass_cache = &renderpass_cache,
+ .bindings = bindings,
+ .program = program,
+ .key = key,
+ };
+
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push(std::move(params));
+ cv.notify_one();
+}
+
+void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
+ while (!is_thread_exiting.load(std::memory_order_relaxed)) {
+ std::unique_lock lock{queue_mutex};
+ cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
+ if (is_thread_exiting) {
+ return;
+ }
+
+ // Partial lock to allow all threads to read at the same time
+ if (!HasWorkQueued()) {
+ continue;
+ }
+ // Another thread beat us, just unlock and wait for the next load
+ if (pending_queue.empty()) {
+ continue;
+ }
+
+ // Pull work from queue
+ WorkerParams work = std::move(pending_queue.front());
+ pending_queue.pop();
+ lock.unlock();
+
+ if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+ const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
+ const auto scope = context->Acquire();
+ auto program =
+ OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
+ Result result{};
+ result.backend = work.backend;
+ result.cpu_address = work.cpu_address;
+ result.uid = work.uid;
+ result.code = std::move(work.code);
+ result.code_b = std::move(work.code_b);
+ result.shader_type = work.shader_type;
+
+ if (work.backend == Backend::OpenGL) {
+ result.program.opengl = std::move(program->source_program);
+ } else if (work.backend == Backend::GLASM) {
+ result.program.glasm = std::move(program->assembly_program);
+ }
+
+ {
+ std::unique_lock complete_lock(completed_mutex);
+ finished_work.push_back(std::move(result));
+ }
+ } else if (work.backend == Backend::Vulkan) {
+ auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+ *work.vk_device, *work.scheduler, *work.descriptor_pool,
+ *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+ work.program);
+
+ work.pp_cache->EmplacePipeline(std::move(pipeline));
+ }
+ }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
new file mode 100644
index 000000000..7a99e1dc5
--- /dev/null
+++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,147 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <condition_variable>
+#include <memory>
+#include <shared_mutex>
+#include <thread>
+
+// This header includes both Vulkan and OpenGL headers, this has to be fixed
+// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
+// Forcefully include glad early and undefine macros
+#include <glad/glad.h>
+#ifdef CreateEvent
+#undef CreateEvent
+#endif
+#ifdef CreateSemaphore
+#undef CreateSemaphore
+#endif
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+class GraphicsContext;
+} // namespace Core::Frontend
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Vulkan {
+class VKPipelineCache;
+}
+
+namespace VideoCommon::Shader {
+
+class AsyncShaders {
+public:
+ enum class Backend {
+ OpenGL,
+ GLASM,
+ Vulkan,
+ };
+
+ struct ResultPrograms {
+ OpenGL::OGLProgram opengl;
+ OpenGL::OGLAssemblyProgram glasm;
+ };
+
+ struct Result {
+ u64 uid;
+ VAddr cpu_address;
+ Backend backend;
+ ResultPrograms program;
+ std::vector<u64> code;
+ std::vector<u64> code_b;
+ Tegra::Engines::ShaderType shader_type;
+ };
+
+ explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+ ~AsyncShaders();
+
+ /// Start up shader worker threads
+ void AllocateWorkers();
+
+ /// Clear the shader queue and kill all worker threads
+ void FreeWorkers();
+
+ // Force end all threads
+ void KillWorkers();
+
+ /// Check to see if any shaders have actually been compiled
+ [[nodiscard]] bool HasCompletedWork() const;
+
+ /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
+ /// every shader async as some shaders are only built and executed once. We try to "guess" which
+ /// shader would be used only once
+ [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
+
+ /// Pulls completed compiled shaders
+ [[nodiscard]] std::vector<Result> GetCompletedWork();
+
+ void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
+ u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
+ CompilerSettings compiler_settings, const Registry& registry,
+ VAddr cpu_addr);
+
+ void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+ Vulkan::VKScheduler& scheduler,
+ Vulkan::VKDescriptorPool& descriptor_pool,
+ Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+ Vulkan::VKRenderPassCache& renderpass_cache,
+ std::vector<VkDescriptorSetLayoutBinding> bindings,
+ Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
+
+private:
+ void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
+
+ /// Check our worker queue to see if we have any work queued already
+ [[nodiscard]] bool HasWorkQueued() const;
+
+ struct WorkerParams {
+ Backend backend;
+ // For OGL
+ const OpenGL::Device* device;
+ Tegra::Engines::ShaderType shader_type;
+ u64 uid;
+ std::vector<u64> code;
+ std::vector<u64> code_b;
+ u32 main_offset;
+ CompilerSettings compiler_settings;
+ std::optional<Registry> registry;
+ VAddr cpu_address;
+
+ // For Vulkan
+ Vulkan::VKPipelineCache* pp_cache;
+ const Vulkan::VKDevice* vk_device;
+ Vulkan::VKScheduler* scheduler;
+ Vulkan::VKDescriptorPool* descriptor_pool;
+ Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+ Vulkan::VKRenderPassCache* renderpass_cache;
+ std::vector<VkDescriptorSetLayoutBinding> bindings;
+ Vulkan::SPIRVProgram program;
+ Vulkan::GraphicsPipelineCacheKey key;
+ };
+
+ std::condition_variable cv;
+ mutable std::mutex queue_mutex;
+ mutable std::shared_mutex completed_mutex;
+ std::atomic<bool> is_thread_exiting{};
+ std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
+ std::vector<std::thread> worker_threads;
+ std::queue<WorkerParams> pending_queue;
+ std::vector<Result> finished_work;
+ Core::Frontend::EmuWindow& emu_window;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 8d86020f6..4c8971615 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state,
std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
u64 ldc_tracked_register) {
- return TrackInstruction<u64>(state, pos,
- [ldc_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::SHL_IMM &&
- instr.gpr0.Value() == ldc_tracked_register;
- },
- [](auto instr, const auto&) { return instr.gpr8.Value(); });
+ return TrackInstruction<u64>(
+ state, pos,
+ [ldc_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::SHL_IMM &&
+ instr.gpr0.Value() == ldc_tracked_register;
+ },
+ [](auto instr, const auto&) { return instr.gpr8.Value(); });
}
std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
u64 shl_tracked_register) {
- return TrackInstruction<u32>(state, pos,
- [shl_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
- instr.gpr0.Value() == shl_tracked_register;
- },
- [](auto instr, const auto&) {
- return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
- });
+ return TrackInstruction<u32>(
+ state, pos,
+ [shl_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+ instr.gpr0.Value() == shl_tracked_register;
+ },
+ [](auto instr, const auto&) {
+ return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+ });
}
std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
@@ -545,13 +547,13 @@ bool TryQuery(CFGRebuildState& state) {
gather_labels(q2.ssy_stack, state.ssy_labels, block);
gather_labels(q2.pbk_stack, state.pbk_labels, block);
if (std::holds_alternative<SingleBranch>(*block.branch)) {
- const auto branch = std::get_if<SingleBranch>(block.branch.get());
+ auto* branch = std::get_if<SingleBranch>(block.branch.get());
if (!branch->condition.IsUnconditional()) {
q2.address = block.end + 1;
state.queries.push_back(q2);
}
- Query conditional_query{q2};
+ auto& conditional_query = state.queries.emplace_back(q2);
if (branch->is_sync) {
if (branch->address == unassigned_branch) {
branch->address = conditional_query.ssy_stack.top();
@@ -565,21 +567,21 @@ bool TryQuery(CFGRebuildState& state) {
conditional_query.pbk_stack.pop();
}
conditional_query.address = branch->address;
- state.queries.push_back(std::move(conditional_query));
return true;
}
- const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+
+ const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
for (const auto& branch_case : multi_branch->branches) {
- Query conditional_query{q2};
+ auto& conditional_query = state.queries.emplace_back(q2);
conditional_query.address = branch_case.address;
- state.queries.push_back(std::move(conditional_query));
}
+
return true;
}
void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
- const auto get_expr = ([&](const Condition& cond) -> Expr {
- Expr result{};
+ const auto get_expr = [](const Condition& cond) -> Expr {
+ Expr result;
if (cond.cc != ConditionCode::T) {
result = MakeExpr<ExprCondCode>(cond.cc);
}
@@ -592,10 +594,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
}
Expr extra = MakeExpr<ExprPredicate>(pred);
if (negate) {
- extra = MakeExpr<ExprNot>(extra);
+ extra = MakeExpr<ExprNot>(std::move(extra));
}
if (result) {
- return MakeExpr<ExprAnd>(extra, result);
+ return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
}
return extra;
}
@@ -603,9 +605,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
return result;
}
return MakeExpr<ExprBoolean>(true);
- });
+ };
+
if (std::holds_alternative<SingleBranch>(*branch_info)) {
- const auto branch = std::get_if<SingleBranch>(branch_info.get());
+ const auto* branch = std::get_if<SingleBranch>(branch_info.get());
if (branch->address < 0) {
if (branch->kill) {
mm.InsertReturn(get_expr(branch->condition), true);
@@ -617,7 +620,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
mm.InsertGoto(get_expr(branch->condition), branch->address);
return;
}
- const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+ const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
for (const auto& branch_case : multi_branch->branches) {
mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
branch_case.address);
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index a276aee44..88103fede 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -53,6 +53,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
absolute_a = ((instr.value >> 44) & 1) != 0;
absolute_b = ((instr.value >> 54) & 1) != 0;
break;
+ default:
+ UNREACHABLE();
+ break;
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index a041519b7..73155966f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
- const Node value = [&]() {
- const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+ const Node value = [&] {
+ Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
}
- const Node shifted = [&]() {
+ const Node shifted = [&] {
switch (instr.iadd3.mode) {
case Tegra::Shader::IAdd3Mode::RightShift:
// TODO(tech4me): According to
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 73880db0e..2a30aab2b 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
case OpCode::Id::IADD32I: {
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
- op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
+ op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
- const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+ Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
- SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::LOP32I: {
- if (instr.alu.lop32i.invert_a)
- op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+ if (instr.alu.lop32i.invert_a) {
+ op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
+ }
- if (instr.alu.lop32i.invert_b)
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+ if (instr.alu.lop32i.invert_b) {
+ op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+ }
- WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
- PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
+ WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
+ std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
+ instr.op_32.generates_cc != 0);
break;
}
default:
@@ -58,14 +61,14 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
Node op_b, PredicateResultMode predicate_mode, Pred predicate,
bool sets_cc) {
- const Node result = [&]() {
+ Node result = [&] {
switch (logic_op) {
case LogicOperation::And:
- return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::Or:
- return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::Xor:
- return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::PassB:
return op_b;
default:
@@ -84,8 +87,8 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
return;
case PredicateResultMode::NotZero: {
// Set the predicate to true if the result is not zero.
- const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
- SetPredicate(bb, static_cast<u64>(predicate), compare);
+ Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
+ SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
break;
}
default:
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ PredCondition cond;
+ bool bf;
+ bool ftz;
+ bool neg_a;
+ bool abs_a;
+ bool neg_b;
+ bool abs_b;
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ case OpCode::Id::HSET2_IMM:
+ cond = instr.hsetp2.cbuf_and_imm.cond;
+ bf = instr.Bit(53);
+ ftz = instr.Bit(54);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(56);
+ abs_b = instr.Bit(54);
+ break;
+ case OpCode::Id::HSET2_R:
+ cond = instr.hsetp2.reg.cond;
+ bf = instr.Bit(49);
+ ftz = instr.Bit(50);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(31);
+ abs_b = instr.Bit(30);
+ break;
+ default:
+ UNREACHABLE();
}
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
-
- Node op_b = [&]() {
+ Node op_b = [this, instr, opcode] {
switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ // Inform as unimplemented as this is not tested.
+ UNIMPLEMENTED_MSG("HSET2_C is not implemented");
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
case OpCode::Id::HSET2_R:
return GetRegister(instr.gpr20);
+ case OpCode::Id::HSET2_IMM:
+ return UnpackHalfImmediate(instr, true);
default:
UNREACHABLE();
- return Immediate(0);
+ return Node{};
}
}();
- op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
- op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
- const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+ if (!ftz) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
+
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+ op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
+
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_R:
+ op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
+ [[fallthrough]];
+ case OpCode::Id::HSET2_C:
+ op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
+ break;
+ default:
+ break;
+ }
- const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
+ Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+ Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
// HSET2 operates on each half float in the pack.
std::array<Node, 2> values;
for (u32 i = 0; i < 2; ++i) {
- const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
- const Node true_value = Immediate(raw_value << (i * 16));
- const Node false_value = Immediate(0);
-
- const Node comparison =
- Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
- const Node predicate = Operation(combiner, comparison, second_pred);
+ const u32 raw_value = bf ? 0x3c00 : 0xffff;
+ Node true_value = Immediate(raw_value << (i * 16));
+ Node false_value = Immediate(0);
+ Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+ Node predicate = Operation(combiner, comparison, second_pred);
values[i] =
- Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+ Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
}
- const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
- SetRegister(bb, instr.gpr0, value);
+ Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
+ SetRegister(bb, instr.gpr0, move(value));
return pc;
}
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 60b6ad72a..618d309d2 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
std::size_t component) {
const TextureFormat format{descriptor.format};
switch (format) {
- case TextureFormat::R16_G16_B16_A16:
- case TextureFormat::R32_G32_B32_A32:
- case TextureFormat::R32_G32_B32:
- case TextureFormat::R32_G32:
- case TextureFormat::R16_G16:
+ case TextureFormat::R16G16B16A16:
+ case TextureFormat::R32G32B32A32:
+ case TextureFormat::R32G32B32:
+ case TextureFormat::R32G32:
+ case TextureFormat::R16G16:
case TextureFormat::R32:
case TextureFormat::R16:
case TextureFormat::R8:
@@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
break;
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
+ case TextureFormat::B10G11R11:
if (component == 0) {
return descriptor.b_type;
}
@@ -107,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
return descriptor.r_type;
}
break;
- case TextureFormat::G8R24:
- case TextureFormat::G24R8:
- case TextureFormat::G8R8:
+ case TextureFormat::R24G8:
+ case TextureFormat::R8G24:
+ case TextureFormat::R8G8:
case TextureFormat::G4R4:
if (component == 0) {
return descriptor.g_type;
@@ -118,8 +119,10 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
return descriptor.r_type;
}
break;
+ default:
+ break;
}
- UNIMPLEMENTED_MSG("texture format not implement={}", format);
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return ComponentType::FLOAT;
}
@@ -136,15 +139,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
u32 GetComponentSize(TextureFormat format, std::size_t component) {
switch (format) {
- case TextureFormat::R32_G32_B32_A32:
+ case TextureFormat::R32G32B32A32:
return 32;
- case TextureFormat::R16_G16_B16_A16:
+ case TextureFormat::R16G16B16A16:
return 16;
- case TextureFormat::R32_G32_B32:
+ case TextureFormat::R32G32B32:
return component <= 2 ? 32 : 0;
- case TextureFormat::R32_G32:
+ case TextureFormat::R32G32:
return component <= 1 ? 32 : 0;
- case TextureFormat::R16_G16:
+ case TextureFormat::R16G16:
return component <= 1 ? 16 : 0;
case TextureFormat::R32:
return component == 0 ? 32 : 0;
@@ -191,7 +194,15 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 6;
}
return 0;
- case TextureFormat::G8R24:
+ case TextureFormat::B10G11R11:
+ if (component == 1 || component == 2) {
+ return 11;
+ }
+ if (component == 0) {
+ return 10;
+ }
+ return 0;
+ case TextureFormat::R24G8:
if (component == 0) {
return 8;
}
@@ -199,7 +210,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 24;
}
return 0;
- case TextureFormat::G24R8:
+ case TextureFormat::R8G24:
if (component == 0) {
return 8;
}
@@ -207,12 +218,12 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 24;
}
return 0;
- case TextureFormat::G8R8:
+ case TextureFormat::R8G8:
return (component == 0 || component == 1) ? 8 : 0;
case TextureFormat::G4R4:
return (component == 0 || component == 1) ? 4 : 0;
default:
- UNIMPLEMENTED_MSG("texture format not implement={}", format);
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return 0;
}
}
@@ -223,24 +234,25 @@ std::size_t GetImageComponentMask(TextureFormat format) {
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
switch (format) {
- case TextureFormat::R32_G32_B32_A32:
- case TextureFormat::R16_G16_B16_A16:
+ case TextureFormat::R32G32B32A32:
+ case TextureFormat::R16G16B16A16:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A4B4G4R4:
case TextureFormat::A5B5G5R1:
case TextureFormat::A1B5G5R5:
return std::size_t{R | G | B | A};
- case TextureFormat::R32_G32_B32:
+ case TextureFormat::R32G32B32:
case TextureFormat::R32_B24G8:
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
+ case TextureFormat::B10G11R11:
return std::size_t{R | G | B};
- case TextureFormat::R32_G32:
- case TextureFormat::R16_G16:
- case TextureFormat::G8R24:
- case TextureFormat::G24R8:
- case TextureFormat::G8R8:
+ case TextureFormat::R32G32:
+ case TextureFormat::R16G16:
+ case TextureFormat::R24G8:
+ case TextureFormat::R8G24:
+ case TextureFormat::R8G8:
case TextureFormat::G4R4:
return std::size_t{R | G};
case TextureFormat::R32:
@@ -249,7 +261,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {
case TextureFormat::R1:
return std::size_t{R};
default:
- UNIMPLEMENTED_MSG("texture format not implement={}", format);
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return std::size_t{R | G | B | A};
}
}
@@ -299,7 +311,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,
return {std::move(original_value), true};
}
default:
- UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+ UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
return {std::move(original_value), true};
}
}
@@ -455,11 +467,14 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
return OperationCode::AtomicImageXor;
case Tegra::Shader::ImageAtomicOperation::Exch:
return OperationCode::AtomicImageExchange;
+ default:
+ break;
}
+ break;
default:
break;
}
- UNIMPLEMENTED_MSG("Unimplemented operation={} type={}",
+ UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
static_cast<u64>(instr.suatom_d.operation.Value()),
static_cast<u64>(instr.suatom_d.operation_type.Value()));
return OperationCode::AtomicImageAdd;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 63adbc4a3..e2bba88dd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::RED: {
- UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+ UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+ static_cast<int>(instr.red.type.Value()));
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
@@ -471,9 +472,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
const auto [base_address, index, offset] =
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
- { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
- "Global memory tracking failed");
+ ASSERT_OR_EXECUTE_MSG(
+ base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+ "Global memory tracking failed");
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index c0a8f233f..29a7cfbfe 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -75,8 +75,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::LaneId:
- LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
- return Immediate(0U);
+ return Operation(OperationCode::ThreadId);
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8f0bb996e..a03b50e39 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
- std::optional<u32> buffer) {
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
+ SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
if (info.IsComplete()) {
return info;
}
- const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
- : registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
SamplerInfo sampler_info) {
- const auto offset = static_cast<u32>(sampler.index.Value());
- const auto info = GetSamplerInfo(sampler_info, offset);
+ const u32 offset = static_cast<u32>(sampler.index.Value());
+ const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
const Node sampler_register = GetRegister(reg);
const auto [base_node, tracked_sampler_info] =
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT(base_node != nullptr);
- if (base_node == nullptr) {
+ if (!base_node) {
+ UNREACHABLE();
return std::nullopt;
}
- if (const auto bindless_sampler_info =
- std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
- const u32 buffer = bindless_sampler_info->GetIndex();
- const u32 offset = bindless_sampler_info->GetOffset();
- info = GetSamplerInfo(info, offset, buffer);
+ if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+ const u32 buffer = sampler_info->index;
+ const u32 offset = sampler_info->offset;
+ info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
- [buffer = buffer, offset = offset](const Sampler& entry) {
+ [buffer, offset](const Sampler& entry) {
return entry.buffer == buffer && entry.offset == offset;
});
if (it != used_samplers.end()) {
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
*info.is_shadow, *info.is_buffer, false);
}
- if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
- const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
- index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
- info = GetSamplerInfo(info, base_offset);
+ if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
+ const std::pair indices = sampler_info->indices;
+ const std::pair offsets = sampler_info->offsets;
+ info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
+
+ // Try to use an already created sampler if it exists
+ const auto it = std::find_if(
+ used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
+ return offsets == std::pair{entry.offset, entry.secondary_offset} &&
+ indices == std::pair{entry.buffer, entry.secondary_buffer};
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
+ it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+ return *it;
+ }
+
+ // Otherwise create a new mapping for this sampler
+ const u32 next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
+ *info.is_shadow, *info.is_buffer);
+ }
+ if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+ const u32 base_offset = sampler_info->base_offset / 4;
+ index_var = GetCustomVariable(sampler_info->bindless_var);
+ info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(
@@ -744,7 +763,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
const auto texture_type{instr.tld.texture_type};
- const bool is_array{instr.tld.is_array};
+ const bool is_array{instr.tld.is_array != 0};
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
const std::size_t coord_count{GetCoordCount(texture_type)};
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 64ba60ea2..1c0957277 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
return pc;
}
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
- Tegra::Shader::VideoType type, u64 byte_height) {
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
+ u64 byte_height) {
if (!is_chunk) {
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
}
- const Node zero = Immediate(0);
switch (type) {
- case Tegra::Shader::VideoType::Size16_Low:
+ case VideoType::Size16_Low:
return BitfieldExtract(op, 0, 16);
- case Tegra::Shader::VideoType::Size16_High:
+ case VideoType::Size16_High:
return BitfieldExtract(op, 16, 16);
- case Tegra::Shader::VideoType::Size32:
+ case VideoType::Size32:
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
UNIMPLEMENTED();
- return zero;
- case Tegra::Shader::VideoType::Invalid:
+ return Immediate(0);
+ case VideoType::Invalid:
UNREACHABLE_MSG("Invalid instruction encoding");
- return zero;
+ return Immediate(0);
default:
UNREACHABLE();
- return zero;
+ return Immediate(0);
}
}
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index c83dc6615..233b8fa42 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
SetTemporary(bb, 0, product);
product = GetTemporary(0);
- const Node original_c = op_c;
+ Node original_c = op_c;
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
- op_c = [&]() {
+ op_c = [&] {
switch (set_mode) {
case Tegra::Shader::XmadMode::None:
return original_c;
case Tegra::Shader::XmadMode::CLo:
- return BitfieldExtract(original_c, 0, 16);
+ return BitfieldExtract(std::move(original_c), 0, 16);
case Tegra::Shader::XmadMode::CHi:
- return BitfieldExtract(original_c, 16, 16);
+ return BitfieldExtract(std::move(original_c), 16, 16);
case Tegra::Shader::XmadMode::CBcc: {
- const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
- original_b, Immediate(16));
- return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
+ Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+ original_b, Immediate(16));
+ return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
+ std::move(shifted_b));
}
case Tegra::Shader::XmadMode::CSfu: {
const Node comp_a =
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 074f21691..e18ccba8e 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -16,11 +16,10 @@
namespace VideoCommon::Shader {
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
- const auto& gpu{system.GPU().Maxwell3D()};
- const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
- return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+ const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+ return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
}
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
@@ -66,12 +65,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add
u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
const ProgramCode& code_b) {
- u64 unique_identifier = boost::hash_value(code);
+ size_t unique_identifier = boost::hash_value(code);
if (is_a) {
// VertexA programs include two programs
boost::hash_combine(unique_identifier, boost::hash_value(code_b));
}
- return unique_identifier;
+ return static_cast<u64>(unique_identifier);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
index be90d24fd..4624d38e6 100644
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -11,10 +11,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
-namespace Core {
-class System;
-}
-
namespace Tegra {
class MemoryManager;
}
@@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
/// Gets if the current instruction offset is a scheduler instruction
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c5e5165ff..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
-class BindlessSamplerNode;
-class ArraySamplerNode;
+struct ArraySamplerNode;
+struct BindlessSamplerNode;
+struct SeparateSamplerNode;
-using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
using TrackSampler = std::shared_ptr<TrackSamplerData>;
struct Sampler {
@@ -288,63 +289,51 @@ struct Sampler {
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
is_buffer{is_buffer}, is_indexed{is_indexed} {}
+ /// Separate sampler constructor
+ constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
+ Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
+ bool is_buffer)
+ : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
+ buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
+ is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
+
/// Bindless samplers constructor
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
- u32 index = 0; ///< Emulated index given for the this sampler.
- u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
- u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
- u32 size = 1; ///< Size of the sampler.
+ u32 index = 0; ///< Emulated index given for the this sampler.
+ u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
+ u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
+ u32 buffer = 0; ///< Buffer where the bindless sampler is read.
+ u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
+ u32 size = 1; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
- bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
- bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
- bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
+ bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
+ bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
+ bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
+ bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
+ bool is_separated = false; ///< Whether the image and sampler is separated or not.
};
/// Represents a tracked bindless sampler into a direct const buffer
-class ArraySamplerNode final {
-public:
- explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
- : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
- constexpr u32 GetBaseOffset() const {
- return base_offset;
- }
-
- constexpr u32 GetIndexVar() const {
- return bindless_var;
- }
-
-private:
+struct ArraySamplerNode {
u32 index;
u32 base_offset;
u32 bindless_var;
};
-/// Represents a tracked bindless sampler into a direct const buffer
-class BindlessSamplerNode final {
-public:
- explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
- constexpr u32 GetOffset() const {
- return offset;
- }
+/// Represents a tracked separate sampler image pair that was folded statically
+struct SeparateSamplerNode {
+ std::pair<u32, u32> indices;
+ std::pair<u32, u32> offsets;
+};
-private:
+/// Represents a tracked bindless sampler into a direct const buffer
+struct BindlessSamplerNode {
u32 index;
u32 offset;
};
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
template <typename T, typename... Args>
TrackSampler MakeTrackSampler(Args&&... args) {
static_assert(std::is_convertible_v<T, TrackSamplerData>);
- return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+ return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
}
template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..148d91fcb 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -24,44 +24,45 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac
if (shader_stage == ShaderType::Compute) {
return {};
}
- auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
-
- GraphicsInfo info;
- info.tfb_layouts = graphics.regs.tfb_layouts;
- info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
- info.primitive_topology = graphics.regs.draw.topology;
- info.tessellation_primitive = graphics.regs.tess_mode.prim;
- info.tessellation_spacing = graphics.regs.tess_mode.spacing;
- info.tfb_enabled = graphics.regs.tfb_enabled;
- info.tessellation_clockwise = graphics.regs.tess_mode.cw;
- return info;
+
+ auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+ return {
+ .tfb_layouts = graphics.regs.tfb_layouts,
+ .tfb_varying_locs = graphics.regs.tfb_varying_locs,
+ .primitive_topology = graphics.regs.draw.topology,
+ .tessellation_primitive = graphics.regs.tess_mode.prim,
+ .tessellation_spacing = graphics.regs.tess_mode.spacing,
+ .tfb_enabled = graphics.regs.tfb_enabled != 0,
+ .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
+ };
}
ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
if (shader_stage != ShaderType::Compute) {
return {};
}
- auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+
+ auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
const auto& launch = compute.launch_description;
- ComputeInfo info;
- info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
- info.local_memory_size_in_words = launch.local_pos_alloc;
- info.shared_memory_size_in_words = launch.shared_alloc;
- return info;
+ return {
+ .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
+ .shared_memory_size_in_words = launch.shared_alloc,
+ .local_memory_size_in_words = launch.local_pos_alloc,
+ };
}
} // Anonymous namespace
-Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
: stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
-Registry::Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine)
- : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
- graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
- shader_stage, engine)} {}
+Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
+ : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
+ graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
+ shader_stage, engine_)} {}
Registry::~Registry() = default;
@@ -93,8 +94,27 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
return value;
}
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
- u32 offset) {
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
+ std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
+ SeparateSamplerKey key;
+ key.buffers = buffers;
+ key.offsets = offsets;
+ const auto iter = separate_samplers.find(key);
+ if (iter != separate_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+
+ const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
+ const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
+ const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
+ separate_samplers.emplace(key, value);
+ return value;
+}
+
+std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..4bebefdde 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
namespace VideoCommon::Shader {
+struct SeparateSamplerKey {
+ std::pair<u32, u32> buffers;
+ std::pair<u32, u32> offsets;
+};
+
+} // namespace VideoCommon::Shader
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::Shader::SeparateSamplerKey> {
+ std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
+ return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
+ key.offsets.second);
+ }
+};
+
+template <>
+struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
+ bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
+ const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
+ return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
+ }
+};
+
+} // namespace std
+
+namespace VideoCommon::Shader {
+
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using SeparateSamplerMap =
+ std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
@@ -63,7 +94,7 @@ public:
explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
explicit Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine);
+ Tegra::Engines::ConstBufferEngineInterface& engine_);
~Registry();
@@ -73,6 +104,9 @@ public:
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
+ std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
+
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
/// Inserts a key.
@@ -128,6 +162,7 @@ private:
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
+ SeparateSamplerMap separate_samplers;
BindlessSamplerMap bindless_samplers;
u32 bound_buffer;
GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index e322c3402..29d794b34 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
}
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
- const Node node = MakeNode<InternalFlagNode>(flag);
+ Node node = MakeNode<InternalFlagNode>(flag);
if (negated) {
- return Operation(OperationCode::LogicalNegate, node);
+ return Operation(OperationCode::LogicalNegate, std::move(node));
}
return node;
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 15ae152f2..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -330,8 +330,8 @@ private:
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
/// Queries the missing sampler info from the execution context.
- SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset,
- std::optional<u32> buffer = std::nullopt);
+ SamplerInfo GetSamplerInfo(SamplerInfo info,
+ std::optional<Tegra::Engines::SamplerDescriptor> sampler);
/// Accesses a texture sampler.
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@@ -409,8 +409,14 @@ private:
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
- std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor);
+ std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor);
+
+ std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
+ const OperationNode& operation,
+ Node gpr, Node base_offset,
+ Node tracked, const NodeBlock& code,
+ s64 cursor);
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index eb97bfd41..6be3ea92b 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
namespace VideoCommon::Shader {
namespace {
+
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
operation->SetAmendIndex(amend_index);
return true;
- } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ }
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
conditional->SetAmendIndex(amend_index);
return true;
}
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
} // Anonymous namespace
-std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor) {
+std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor) {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+ const u32 cbuf_index = cbuf->GetIndex();
+
// Constant buffer found, test if it's an immediate
const auto& offset = cbuf->GetOffset();
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- auto track =
- MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+ auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
return {tracked, track};
}
if (const auto operation = std::get_if<OperationNode>(&*offset)) {
const u32 bound_buffer = registry.GetBoundBuffer();
- if (bound_buffer != cbuf->GetIndex()) {
+ if (bound_buffer != cbuf_index) {
return {};
}
- const auto pair = DecoupleIndirectRead(*operation);
- if (!pair) {
- return {};
+ if (const std::optional pair = DecoupleIndirectRead(*operation)) {
+ auto [gpr, base_offset] = *pair;
+ return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
+ code, cursor);
}
- auto [gpr, base_offset] = *pair;
- const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
- const auto& gpu_driver = registry.AccessGuestDriverProfile();
- const u32 bindless_cv = NewCustomVariable();
- Node op =
- Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
-
- const Node cv_node = GetCustomVariable(bindless_cv);
- Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
- const std::size_t amend_index = DeclareAmend(std::move(amend_op));
- AmendNodeCv(amend_index, code[cursor]);
- // TODO Implement Bindless Index custom variable
- auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
- offset_inm->GetValue(), bindless_cv);
- return {tracked, track};
}
return {};
}
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return TrackBindlessSampler(source, code, new_cursor);
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
- for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
- if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
- std::get<0>(found)) {
- // Cbuf found in operand.
+ const OperationNode& op = *operation;
+
+ const OperationCode opcode = operation->GetCode();
+ if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
+ ASSERT(op.GetOperandsCount() == 2);
+ auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
+ auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
+ if (node_a && node_b) {
+ auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
+ std::pair{offset_a, offset_b});
+ return {tracked, std::move(track)};
+ }
+ }
+ std::size_t i = op.GetOperandsCount();
+ while (i--) {
+ if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
+ // Constant buffer found in operand.
return found;
}
}
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return {};
}
+std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
+ const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
+ const NodeBlock& code, s64 cursor) {
+ const auto offset_imm = std::get<ImmediateNode>(*base_offset);
+ const auto& gpu_driver = registry.AccessGuestDriverProfile();
+ const u32 bindless_cv = NewCustomVariable();
+ const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
+ Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
+
+ Node cv_node = GetCustomVariable(bindless_cv);
+ Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
+ const std::size_t amend_index = DeclareAmend(std::move(amend_op));
+ AmendNodeCv(amend_index, code[cursor]);
+
+ // TODO: Implement bindless index custom variable
+ auto track =
+ MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
+ return {tracked, track};
+}
+
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
@@ -183,12 +205,12 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
const auto& found = result.first;
if (!found) {
- return {};
+ return std::nullopt;
}
if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
return immediate->GetValue();
}
- return {};
+ return std::nullopt;
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 000000000..015a789d6
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,240 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class T>
+class ShaderCache {
+ static constexpr u64 PAGE_BITS = 14;
+ static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
+
+ struct Entry {
+ VAddr addr_start;
+ VAddr addr_end;
+ T* data;
+
+ bool is_memory_marked = true;
+
+ constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+ return start < addr_end && addr_start < end;
+ }
+ };
+
+public:
+ virtual ~ShaderCache() = default;
+
+ /// @brief Removes shaders inside a given region
+ /// @note Checks for ranges
+ /// @param addr Start address of the invalidation
+ /// @param size Number of bytes of the invalidation
+ void InvalidateRegion(VAddr addr, std::size_t size) {
+ std::scoped_lock lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ RemovePendingShaders();
+ }
+
+ /// @brief Unmarks a memory region as cached and marks it for removal
+ /// @param addr Start address of the CPU write operation
+ /// @param size Number of bytes of the CPU write operation
+ void OnCPUWrite(VAddr addr, std::size_t size) {
+ std::lock_guard lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ }
+
+ /// @brief Flushes delayed removal operations
+ void SyncGuestHost() {
+ std::scoped_lock lock{invalidation_mutex};
+ RemovePendingShaders();
+ }
+
+ /// @brief Tries to obtain a cached shader starting in a given address
+ /// @note Doesn't check for ranges, the given address has to be the start of the shader
+ /// @param addr Start address of the shader, this doesn't cache for region
+ /// @return Pointer to a valid shader, nullptr when nothing is found
+ T* TryGet(VAddr addr) const {
+ std::scoped_lock lock{lookup_mutex};
+
+ const auto it = lookup_cache.find(addr);
+ if (it == lookup_cache.end()) {
+ return nullptr;
+ }
+ return it->second->data;
+ }
+
+protected:
+ explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+
+ /// @brief Register in the cache a given entry
+ /// @param data Shader to store in the cache
+ /// @param addr Start address of the shader that will be registered
+ /// @param size Size in bytes of the shader
+ void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
+ std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+ const VAddr addr_end = addr + size;
+ Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+ const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ invalidation_cache[page].push_back(entry);
+ }
+
+ storage.push_back(std::move(data));
+
+ rasterizer.UpdatePagesCachedCount(addr, size, 1);
+ }
+
+ /// @brief Called when a shader is going to be removed
+ /// @param shader Shader that will be removed
+ /// @pre invalidation_cache is locked
+ /// @pre lookup_mutex is locked
+ virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+
+private:
+ /// @brief Invalidate pages in a given region
+ /// @pre invalidation_mutex is locked
+ void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
+ const VAddr addr_end = addr + size;
+ const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ auto it = invalidation_cache.find(page);
+ if (it == invalidation_cache.end()) {
+ continue;
+ }
+ InvalidatePageEntries(it->second, addr, addr_end);
+ }
+ }
+
+ /// @brief Remove shaders marked for deletion
+ /// @pre invalidation_mutex is locked
+ void RemovePendingShaders() {
+ if (marked_for_removal.empty()) {
+ return;
+ }
+ // Remove duplicates
+ std::sort(marked_for_removal.begin(), marked_for_removal.end());
+ marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
+ marked_for_removal.end());
+
+ std::vector<T*> removed_shaders;
+ removed_shaders.reserve(marked_for_removal.size());
+
+ std::scoped_lock lock{lookup_mutex};
+
+ for (Entry* const entry : marked_for_removal) {
+ removed_shaders.push_back(entry->data);
+
+ const auto it = lookup_cache.find(entry->addr_start);
+ ASSERT(it != lookup_cache.end());
+ lookup_cache.erase(it);
+ }
+ marked_for_removal.clear();
+
+ if (!removed_shaders.empty()) {
+ RemoveShadersFromStorage(std::move(removed_shaders));
+ }
+ }
+
+ /// @brief Invalidates entries in a given range for the passed page
+ /// @param entries Vector of entries in the page, it will be modified on overlaps
+ /// @param addr Start address of the invalidation
+ /// @param addr_end Non-inclusive end address of the invalidation
+ /// @pre invalidation_mutex is locked
+ void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+ std::size_t index = 0;
+ while (index < entries.size()) {
+ Entry* const entry = entries[index];
+ if (!entry->Overlaps(addr, addr_end)) {
+ ++index;
+ continue;
+ }
+
+ UnmarkMemory(entry);
+ RemoveEntryFromInvalidationCache(entry);
+ marked_for_removal.push_back(entry);
+ }
+ }
+
+ /// @brief Removes all references to an entry in the invalidation cache
+ /// @param entry Entry to remove from the invalidation cache
+ /// @pre invalidation_mutex is locked
+ void RemoveEntryFromInvalidationCache(const Entry* entry) {
+ const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
+ const auto entries_it = invalidation_cache.find(page);
+ ASSERT(entries_it != invalidation_cache.end());
+ std::vector<Entry*>& entries = entries_it->second;
+
+ const auto entry_it = std::find(entries.begin(), entries.end(), entry);
+ ASSERT(entry_it != entries.end());
+ entries.erase(entry_it);
+ }
+ }
+
+ /// @brief Unmarks an entry from the rasterizer cache
+ /// @param entry Entry to unmark from memory
+ void UnmarkMemory(Entry* entry) {
+ if (!entry->is_memory_marked) {
+ return;
+ }
+ entry->is_memory_marked = false;
+
+ const VAddr addr = entry->addr_start;
+ const std::size_t size = entry->addr_end - addr;
+ rasterizer.UpdatePagesCachedCount(addr, size, -1);
+ }
+
+ /// @brief Removes a vector of shaders from a list
+ /// @param removed_shaders Shaders to be removed from the storage
+ /// @pre invalidation_mutex is locked
+ /// @pre lookup_mutex is locked
+ void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
+ // Notify removals
+ for (T* const shader : removed_shaders) {
+ OnShaderRemoval(shader);
+ }
+
+ // Remove them from the cache
+ const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
+ return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
+ removed_shaders.end();
+ };
+ std::erase_if(storage, is_removed);
+ }
+
+ /// @brief Creates a new entry in the lookup cache and returns its pointer
+ /// @pre lookup_mutex is locked
+ Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
+ auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+ Entry* const entry_pointer = entry.get();
+
+ lookup_cache.emplace(addr, std::move(entry));
+ return entry_pointer;
+ }
+
+ VideoCore::RasterizerInterface& rasterizer;
+
+ mutable std::mutex lookup_mutex;
+ std::mutex invalidation_mutex;
+
+ std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
+ std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
+ std::vector<std::unique_ptr<T>> storage;
+ std::vector<Entry*> marked_for_removal;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
new file mode 100644
index 000000000..c3c71657d
--- /dev/null
+++ b/src/video_core/shader_notify.cpp
@@ -0,0 +1,42 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/shader_notify.h"
+
+using namespace std::chrono_literals;
+
+namespace VideoCore {
+namespace {
+constexpr auto UPDATE_TICK = 32ms;
+}
+
+ShaderNotify::ShaderNotify() = default;
+ShaderNotify::~ShaderNotify() = default;
+
+std::size_t ShaderNotify::GetShadersBuilding() {
+ const auto now = std::chrono::high_resolution_clock::now();
+ const auto diff = now - last_update;
+ if (diff > UPDATE_TICK) {
+ std::shared_lock lock(mutex);
+ last_updated_count = accurate_count;
+ }
+ return last_updated_count;
+}
+
+std::size_t ShaderNotify::GetShadersBuildingAccurate() {
+ std::shared_lock lock{mutex};
+ return accurate_count;
+}
+
+void ShaderNotify::MarkShaderComplete() {
+ std::unique_lock lock{mutex};
+ accurate_count--;
+}
+
+void ShaderNotify::MarkSharderBuilding() {
+ std::unique_lock lock{mutex};
+ accurate_count++;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
new file mode 100644
index 000000000..a9c92d179
--- /dev/null
+++ b/src/video_core/shader_notify.h
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <shared_mutex>
+#include "common/common_types.h"
+
+namespace VideoCore {
+class ShaderNotify {
+public:
+ ShaderNotify();
+ ~ShaderNotify();
+
+ std::size_t GetShadersBuilding();
+ std::size_t GetShadersBuildingAccurate();
+
+ void MarkShaderComplete();
+ void MarkSharderBuilding();
+
+private:
+ std::size_t last_updated_count{};
+ std::size_t accurate_count{};
+ std::shared_mutex mutex;
+ std::chrono::high_resolution_clock::time_point last_update{};
+};
+} // namespace VideoCore
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index bbe93903c..1688267bb 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -74,117 +74,131 @@ bool SurfaceTargetIsArray(SurfaceTarget target) {
PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
- case Tegra::DepthFormat::S8_Z24_UNORM:
- return PixelFormat::S8Z24;
- case Tegra::DepthFormat::Z24_S8_UNORM:
- return PixelFormat::Z24S8;
- case Tegra::DepthFormat::Z32_FLOAT:
- return PixelFormat::Z32F;
- case Tegra::DepthFormat::Z16_UNORM:
- return PixelFormat::Z16;
- case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
- return PixelFormat::Z32FS8;
+ case Tegra::DepthFormat::S8_UINT_Z24_UNORM:
+ return PixelFormat::S8_UINT_D24_UNORM;
+ case Tegra::DepthFormat::D24S8_UNORM:
+ return PixelFormat::D24_UNORM_S8_UINT;
+ case Tegra::DepthFormat::D32_FLOAT:
+ return PixelFormat::D32_FLOAT;
+ case Tegra::DepthFormat::D16_UNORM:
+ return PixelFormat::D16_UNORM;
+ case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:
+ return PixelFormat::D32_FLOAT_S8_UINT;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- return PixelFormat::S8Z24;
+ UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
+ return PixelFormat::S8_UINT_D24_UNORM;
}
}
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
- case Tegra::RenderTargetFormat::RGBA8_SRGB:
- return PixelFormat::RGBA8_SRGB;
- case Tegra::RenderTargetFormat::RGBA8_UNORM:
- return PixelFormat::ABGR8U;
- case Tegra::RenderTargetFormat::RGBA8_SNORM:
- return PixelFormat::ABGR8S;
- case Tegra::RenderTargetFormat::RGBA8_UINT:
- return PixelFormat::ABGR8UI;
- case Tegra::RenderTargetFormat::BGRA8_SRGB:
- return PixelFormat::BGRA8_SRGB;
- case Tegra::RenderTargetFormat::BGRA8_UNORM:
- return PixelFormat::BGRA8;
- case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
- return PixelFormat::A2B10G10R10U;
- case Tegra::RenderTargetFormat::RGBA16_FLOAT:
- return PixelFormat::RGBA16F;
- case Tegra::RenderTargetFormat::RGBA16_UNORM:
- return PixelFormat::RGBA16U;
- case Tegra::RenderTargetFormat::RGBA16_SNORM:
- return PixelFormat::RGBA16S;
- case Tegra::RenderTargetFormat::RGBA16_UINT:
- return PixelFormat::RGBA16UI;
- case Tegra::RenderTargetFormat::RGBA32_FLOAT:
- return PixelFormat::RGBA32F;
- case Tegra::RenderTargetFormat::RG32_FLOAT:
- return PixelFormat::RG32F;
- case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
- return PixelFormat::R11FG11FB10F;
- case Tegra::RenderTargetFormat::B5G6R5_UNORM:
- return PixelFormat::B5G6R5U;
- case Tegra::RenderTargetFormat::BGR5A1_UNORM:
- return PixelFormat::A1B5G5R5U;
- case Tegra::RenderTargetFormat::RGBA32_UINT:
- return PixelFormat::RGBA32UI;
- case Tegra::RenderTargetFormat::R8_UNORM:
- return PixelFormat::R8U;
- case Tegra::RenderTargetFormat::R8_UINT:
- return PixelFormat::R8UI;
- case Tegra::RenderTargetFormat::RG16_FLOAT:
- return PixelFormat::RG16F;
- case Tegra::RenderTargetFormat::RG16_UINT:
- return PixelFormat::RG16UI;
- case Tegra::RenderTargetFormat::RG16_SINT:
- return PixelFormat::RG16I;
- case Tegra::RenderTargetFormat::RG16_UNORM:
- return PixelFormat::RG16;
- case Tegra::RenderTargetFormat::RG16_SNORM:
- return PixelFormat::RG16S;
- case Tegra::RenderTargetFormat::RG8_UNORM:
- return PixelFormat::RG8U;
- case Tegra::RenderTargetFormat::RG8_SNORM:
- return PixelFormat::RG8S;
- case Tegra::RenderTargetFormat::RG8_UINT:
- return PixelFormat::RG8UI;
- case Tegra::RenderTargetFormat::R16_FLOAT:
- return PixelFormat::R16F;
+ case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT:
+ return PixelFormat::R32G32B32A32_FLOAT;
+ case Tegra::RenderTargetFormat::R32G32B32A32_SINT:
+ return PixelFormat::R32G32B32A32_SINT;
+ case Tegra::RenderTargetFormat::R32G32B32A32_UINT:
+ return PixelFormat::R32G32B32A32_UINT;
+ case Tegra::RenderTargetFormat::R16G16B16A16_UNORM:
+ return PixelFormat::R16G16B16A16_UNORM;
+ case Tegra::RenderTargetFormat::R16G16B16A16_SNORM:
+ return PixelFormat::R16G16B16A16_SNORM;
+ case Tegra::RenderTargetFormat::R16G16B16A16_SINT:
+ return PixelFormat::R16G16B16A16_SINT;
+ case Tegra::RenderTargetFormat::R16G16B16A16_UINT:
+ return PixelFormat::R16G16B16A16_UINT;
+ case Tegra::RenderTargetFormat::R16G16B16A16_FLOAT:
+ return PixelFormat::R16G16B16A16_FLOAT;
+ case Tegra::RenderTargetFormat::R32G32_FLOAT:
+ return PixelFormat::R32G32_FLOAT;
+ case Tegra::RenderTargetFormat::R32G32_SINT:
+ return PixelFormat::R32G32_SINT;
+ case Tegra::RenderTargetFormat::R32G32_UINT:
+ return PixelFormat::R32G32_UINT;
+ case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT:
+ return PixelFormat::R16G16B16X16_FLOAT;
+ case Tegra::RenderTargetFormat::B8G8R8A8_UNORM:
+ return PixelFormat::B8G8R8A8_UNORM;
+ case Tegra::RenderTargetFormat::B8G8R8A8_SRGB:
+ return PixelFormat::B8G8R8A8_SRGB;
+ case Tegra::RenderTargetFormat::A2B10G10R10_UNORM:
+ return PixelFormat::A2B10G10R10_UNORM;
+ case Tegra::RenderTargetFormat::A2B10G10R10_UINT:
+ return PixelFormat::A2B10G10R10_UINT;
+ case Tegra::RenderTargetFormat::A8B8G8R8_UNORM:
+ return PixelFormat::A8B8G8R8_UNORM;
+ case Tegra::RenderTargetFormat::A8B8G8R8_SRGB:
+ return PixelFormat::A8B8G8R8_SRGB;
+ case Tegra::RenderTargetFormat::A8B8G8R8_SNORM:
+ return PixelFormat::A8B8G8R8_SNORM;
+ case Tegra::RenderTargetFormat::A8B8G8R8_SINT:
+ return PixelFormat::A8B8G8R8_SINT;
+ case Tegra::RenderTargetFormat::A8B8G8R8_UINT:
+ return PixelFormat::A8B8G8R8_UINT;
+ case Tegra::RenderTargetFormat::R16G16_UNORM:
+ return PixelFormat::R16G16_UNORM;
+ case Tegra::RenderTargetFormat::R16G16_SNORM:
+ return PixelFormat::R16G16_SNORM;
+ case Tegra::RenderTargetFormat::R16G16_SINT:
+ return PixelFormat::R16G16_SINT;
+ case Tegra::RenderTargetFormat::R16G16_UINT:
+ return PixelFormat::R16G16_UINT;
+ case Tegra::RenderTargetFormat::R16G16_FLOAT:
+ return PixelFormat::R16G16_FLOAT;
+ case Tegra::RenderTargetFormat::B10G11R11_FLOAT:
+ return PixelFormat::B10G11R11_FLOAT;
+ case Tegra::RenderTargetFormat::R32_SINT:
+ return PixelFormat::R32_SINT;
+ case Tegra::RenderTargetFormat::R32_UINT:
+ return PixelFormat::R32_UINT;
+ case Tegra::RenderTargetFormat::R32_FLOAT:
+ return PixelFormat::R32_FLOAT;
+ case Tegra::RenderTargetFormat::R5G6B5_UNORM:
+ return PixelFormat::R5G6B5_UNORM;
+ case Tegra::RenderTargetFormat::A1R5G5B5_UNORM:
+ return PixelFormat::A1R5G5B5_UNORM;
+ case Tegra::RenderTargetFormat::R8G8_UNORM:
+ return PixelFormat::R8G8_UNORM;
+ case Tegra::RenderTargetFormat::R8G8_SNORM:
+ return PixelFormat::R8G8_SNORM;
+ case Tegra::RenderTargetFormat::R8G8_SINT:
+ return PixelFormat::R8G8_SINT;
+ case Tegra::RenderTargetFormat::R8G8_UINT:
+ return PixelFormat::R8G8_UINT;
case Tegra::RenderTargetFormat::R16_UNORM:
- return PixelFormat::R16U;
+ return PixelFormat::R16_UNORM;
case Tegra::RenderTargetFormat::R16_SNORM:
- return PixelFormat::R16S;
- case Tegra::RenderTargetFormat::R16_UINT:
- return PixelFormat::R16UI;
+ return PixelFormat::R16_SNORM;
case Tegra::RenderTargetFormat::R16_SINT:
- return PixelFormat::R16I;
- case Tegra::RenderTargetFormat::R32_FLOAT:
- return PixelFormat::R32F;
- case Tegra::RenderTargetFormat::R32_SINT:
- return PixelFormat::R32I;
- case Tegra::RenderTargetFormat::R32_UINT:
- return PixelFormat::R32UI;
- case Tegra::RenderTargetFormat::RG32_UINT:
- return PixelFormat::RG32UI;
- case Tegra::RenderTargetFormat::RGBX16_FLOAT:
- return PixelFormat::RGBX16F;
+ return PixelFormat::R16_SINT;
+ case Tegra::RenderTargetFormat::R16_UINT:
+ return PixelFormat::R16_UINT;
+ case Tegra::RenderTargetFormat::R16_FLOAT:
+ return PixelFormat::R16_FLOAT;
+ case Tegra::RenderTargetFormat::R8_UNORM:
+ return PixelFormat::R8_UNORM;
+ case Tegra::RenderTargetFormat::R8_SNORM:
+ return PixelFormat::R8_SNORM;
+ case Tegra::RenderTargetFormat::R8_SINT:
+ return PixelFormat::R8_SINT;
+ case Tegra::RenderTargetFormat::R8_UINT:
+ return PixelFormat::R8_UINT;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- return PixelFormat::RGBA8_SRGB;
+ UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format));
+ return PixelFormat::A8B8G8R8_UNORM;
}
}
PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return PixelFormat::ABGR8U;
- case Tegra::FramebufferConfig::PixelFormat::RGB565:
- return PixelFormat::B5G6R5U;
- case Tegra::FramebufferConfig::PixelFormat::BGRA8:
- return PixelFormat::BGRA8;
+ case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ return PixelFormat::A8B8G8R8_UNORM;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ return PixelFormat::R5G6B5_UNORM;
+ case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
+ return PixelFormat::B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
- return PixelFormat::ABGR8U;
+ return PixelFormat::A8B8G8R8_UNORM;
}
}
@@ -212,27 +226,27 @@ SurfaceType GetFormatType(PixelFormat pixel_format) {
bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
- case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::ASTC_2D_5X4:
- case PixelFormat::ASTC_2D_5X5:
- case PixelFormat::ASTC_2D_8X8:
- case PixelFormat::ASTC_2D_8X5:
+ case PixelFormat::ASTC_2D_4X4_UNORM:
+ case PixelFormat::ASTC_2D_5X4_UNORM:
+ case PixelFormat::ASTC_2D_5X5_UNORM:
+ case PixelFormat::ASTC_2D_8X8_UNORM:
+ case PixelFormat::ASTC_2D_8X5_UNORM:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
- case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_UNORM:
case PixelFormat::ASTC_2D_10X8_SRGB:
- case PixelFormat::ASTC_2D_6X6:
+ case PixelFormat::ASTC_2D_6X6_UNORM:
case PixelFormat::ASTC_2D_6X6_SRGB:
- case PixelFormat::ASTC_2D_10X10:
+ case PixelFormat::ASTC_2D_10X10_UNORM:
case PixelFormat::ASTC_2D_10X10_SRGB:
- case PixelFormat::ASTC_2D_12X12:
+ case PixelFormat::ASTC_2D_12X12_UNORM:
case PixelFormat::ASTC_2D_12X12_SRGB:
- case PixelFormat::ASTC_2D_8X6:
+ case PixelFormat::ASTC_2D_8X6_UNORM:
case PixelFormat::ASTC_2D_8X6_SRGB:
- case PixelFormat::ASTC_2D_6X5:
+ case PixelFormat::ASTC_2D_6X5_UNORM:
case PixelFormat::ASTC_2D_6X5_SRGB:
return true;
default:
@@ -242,12 +256,12 @@ bool IsPixelFormatASTC(PixelFormat format) {
bool IsPixelFormatSRGB(PixelFormat format) {
switch (format) {
- case PixelFormat::RGBA8_SRGB:
- case PixelFormat::BGRA8_SRGB:
- case PixelFormat::DXT1_SRGB:
- case PixelFormat::DXT23_SRGB:
- case PixelFormat::DXT45_SRGB:
- case PixelFormat::BC7U_SRGB:
+ case PixelFormat::A8B8G8R8_SRGB:
+ case PixelFormat::B8G8R8A8_SRGB:
+ case PixelFormat::BC1_RGBA_SRGB:
+ case PixelFormat::BC2_SRGB:
+ case PixelFormat::BC3_SRGB:
+ case PixelFormat::BC7_SRGB:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
@@ -269,25 +283,4 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
}
-bool IsFormatBCn(PixelFormat format) {
- switch (format) {
- case PixelFormat::DXT1:
- case PixelFormat::DXT23:
- case PixelFormat::DXT45:
- case PixelFormat::DXN1:
- case PixelFormat::DXN2SNORM:
- case PixelFormat::DXN2UNORM:
- case PixelFormat::BC7U:
- case PixelFormat::BC6H_UF16:
- case PixelFormat::BC6H_SF16:
- case PixelFormat::DXT1_SRGB:
- case PixelFormat::DXT23_SRGB:
- case PixelFormat::DXT45_SRGB:
- case PixelFormat::BC7U_SRGB:
- return true;
- default:
- return false;
- }
-}
-
} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 6da6a1b97..cfd12fa61 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -15,94 +15,105 @@
namespace VideoCore::Surface {
enum class PixelFormat {
- ABGR8U = 0,
- ABGR8S = 1,
- ABGR8UI = 2,
- B5G6R5U = 3,
- A2B10G10R10U = 4,
- A1B5G5R5U = 5,
- R8U = 6,
- R8UI = 7,
- RGBA16F = 8,
- RGBA16U = 9,
- RGBA16S = 10,
- RGBA16UI = 11,
- R11FG11FB10F = 12,
- RGBA32UI = 13,
- DXT1 = 14,
- DXT23 = 15,
- DXT45 = 16,
- DXN1 = 17, // This is also known as BC4
- DXN2UNORM = 18,
- DXN2SNORM = 19,
- BC7U = 20,
- BC6H_UF16 = 21,
- BC6H_SF16 = 22,
- ASTC_2D_4X4 = 23,
- BGRA8 = 24,
- RGBA32F = 25,
- RG32F = 26,
- R32F = 27,
- R16F = 28,
- R16U = 29,
- R16S = 30,
- R16UI = 31,
- R16I = 32,
- RG16 = 33,
- RG16F = 34,
- RG16UI = 35,
- RG16I = 36,
- RG16S = 37,
- RGB32F = 38,
- RGBA8_SRGB = 39,
- RG8U = 40,
- RG8S = 41,
- RG8UI = 42,
- RG32UI = 43,
- RGBX16F = 44,
- R32UI = 45,
- R32I = 46,
- ASTC_2D_8X8 = 47,
- ASTC_2D_8X5 = 48,
- ASTC_2D_5X4 = 49,
- BGRA8_SRGB = 50,
- DXT1_SRGB = 51,
- DXT23_SRGB = 52,
- DXT45_SRGB = 53,
- BC7U_SRGB = 54,
- R4G4B4A4U = 55,
- ASTC_2D_4X4_SRGB = 56,
- ASTC_2D_8X8_SRGB = 57,
- ASTC_2D_8X5_SRGB = 58,
- ASTC_2D_5X4_SRGB = 59,
- ASTC_2D_5X5 = 60,
- ASTC_2D_5X5_SRGB = 61,
- ASTC_2D_10X8 = 62,
- ASTC_2D_10X8_SRGB = 63,
- ASTC_2D_6X6 = 64,
- ASTC_2D_6X6_SRGB = 65,
- ASTC_2D_10X10 = 66,
- ASTC_2D_10X10_SRGB = 67,
- ASTC_2D_12X12 = 68,
- ASTC_2D_12X12_SRGB = 69,
- ASTC_2D_8X6 = 70,
- ASTC_2D_8X6_SRGB = 71,
- ASTC_2D_6X5 = 72,
- ASTC_2D_6X5_SRGB = 73,
- E5B9G9R9F = 74,
+ A8B8G8R8_UNORM,
+ A8B8G8R8_SNORM,
+ A8B8G8R8_SINT,
+ A8B8G8R8_UINT,
+ R5G6B5_UNORM,
+ B5G6R5_UNORM,
+ A1R5G5B5_UNORM,
+ A2B10G10R10_UNORM,
+ A2B10G10R10_UINT,
+ A1B5G5R5_UNORM,
+ R8_UNORM,
+ R8_SNORM,
+ R8_SINT,
+ R8_UINT,
+ R16G16B16A16_FLOAT,
+ R16G16B16A16_UNORM,
+ R16G16B16A16_SNORM,
+ R16G16B16A16_SINT,
+ R16G16B16A16_UINT,
+ B10G11R11_FLOAT,
+ R32G32B32A32_UINT,
+ BC1_RGBA_UNORM,
+ BC2_UNORM,
+ BC3_UNORM,
+ BC4_UNORM,
+ BC4_SNORM,
+ BC5_UNORM,
+ BC5_SNORM,
+ BC7_UNORM,
+ BC6H_UFLOAT,
+ BC6H_SFLOAT,
+ ASTC_2D_4X4_UNORM,
+ B8G8R8A8_UNORM,
+ R32G32B32A32_FLOAT,
+ R32G32B32A32_SINT,
+ R32G32_FLOAT,
+ R32G32_SINT,
+ R32_FLOAT,
+ R16_FLOAT,
+ R16_UNORM,
+ R16_SNORM,
+ R16_UINT,
+ R16_SINT,
+ R16G16_UNORM,
+ R16G16_FLOAT,
+ R16G16_UINT,
+ R16G16_SINT,
+ R16G16_SNORM,
+ R32G32B32_FLOAT,
+ A8B8G8R8_SRGB,
+ R8G8_UNORM,
+ R8G8_SNORM,
+ R8G8_SINT,
+ R8G8_UINT,
+ R32G32_UINT,
+ R16G16B16X16_FLOAT,
+ R32_UINT,
+ R32_SINT,
+ ASTC_2D_8X8_UNORM,
+ ASTC_2D_8X5_UNORM,
+ ASTC_2D_5X4_UNORM,
+ B8G8R8A8_SRGB,
+ BC1_RGBA_SRGB,
+ BC2_SRGB,
+ BC3_SRGB,
+ BC7_SRGB,
+ A4B4G4R4_UNORM,
+ ASTC_2D_4X4_SRGB,
+ ASTC_2D_8X8_SRGB,
+ ASTC_2D_8X5_SRGB,
+ ASTC_2D_5X4_SRGB,
+ ASTC_2D_5X5_UNORM,
+ ASTC_2D_5X5_SRGB,
+ ASTC_2D_10X8_UNORM,
+ ASTC_2D_10X8_SRGB,
+ ASTC_2D_6X6_UNORM,
+ ASTC_2D_6X6_SRGB,
+ ASTC_2D_10X10_UNORM,
+ ASTC_2D_10X10_SRGB,
+ ASTC_2D_12X12_UNORM,
+ ASTC_2D_12X12_SRGB,
+ ASTC_2D_8X6_UNORM,
+ ASTC_2D_8X6_SRGB,
+ ASTC_2D_6X5_UNORM,
+ ASTC_2D_6X5_SRGB,
+ E5B9G9R9_FLOAT,
MaxColorFormat,
// Depth formats
- Z32F = 75,
- Z16 = 76,
+ D32_FLOAT = MaxColorFormat,
+ D16_UNORM,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 77,
- S8Z24 = 78,
- Z32FS8 = 79,
+ D24_UNORM_S8_UINT = MaxDepthFormat,
+ S8_UINT_D24_UNORM,
+ D32_FLOAT_S8_UINT,
MaxDepthStencilFormat,
@@ -130,86 +141,97 @@ enum class SurfaceTarget {
};
constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
- 0, // ABGR8U
- 0, // ABGR8S
- 0, // ABGR8UI
- 0, // B5G6R5U
- 0, // A2B10G10R10U
- 0, // A1B5G5R5U
- 0, // R8U
- 0, // R8UI
- 0, // RGBA16F
- 0, // RGBA16U
- 0, // RGBA16S
- 0, // RGBA16UI
- 0, // R11FG11FB10F
- 0, // RGBA32UI
- 2, // DXT1
- 2, // DXT23
- 2, // DXT45
- 2, // DXN1
- 2, // DXN2UNORM
- 2, // DXN2SNORM
- 2, // BC7U
- 2, // BC6H_UF16
- 2, // BC6H_SF16
- 2, // ASTC_2D_4X4
- 0, // BGRA8
- 0, // RGBA32F
- 0, // RG32F
- 0, // R32F
- 0, // R16F
- 0, // R16U
- 0, // R16S
- 0, // R16UI
- 0, // R16I
- 0, // RG16
- 0, // RG16F
- 0, // RG16UI
- 0, // RG16I
- 0, // RG16S
- 0, // RGB32F
- 0, // RGBA8_SRGB
- 0, // RG8U
- 0, // RG8S
- 0, // RG8UI
- 0, // RG32UI
- 0, // RGBX16F
- 0, // R32UI
- 0, // R32I
- 2, // ASTC_2D_8X8
- 2, // ASTC_2D_8X5
- 2, // ASTC_2D_5X4
- 0, // BGRA8_SRGB
- 2, // DXT1_SRGB
- 2, // DXT23_SRGB
- 2, // DXT45_SRGB
- 2, // BC7U_SRGB
- 0, // R4G4B4A4U
+ 0, // A8B8G8R8_UNORM
+ 0, // A8B8G8R8_SNORM
+ 0, // A8B8G8R8_SINT
+ 0, // A8B8G8R8_UINT
+ 0, // R5G6B5_UNORM
+ 0, // B5G6R5_UNORM
+ 0, // A1R5G5B5_UNORM
+ 0, // A2B10G10R10_UNORM
+ 0, // A2B10G10R10_UINT
+ 0, // A1B5G5R5_UNORM
+ 0, // R8_UNORM
+ 0, // R8_SNORM
+ 0, // R8_SINT
+ 0, // R8_UINT
+ 0, // R16G16B16A16_FLOAT
+ 0, // R16G16B16A16_UNORM
+ 0, // R16G16B16A16_SNORM
+ 0, // R16G16B16A16_SINT
+ 0, // R16G16B16A16_UINT
+ 0, // B10G11R11_FLOAT
+ 0, // R32G32B32A32_UINT
+ 2, // BC1_RGBA_UNORM
+ 2, // BC2_UNORM
+ 2, // BC3_UNORM
+ 2, // BC4_UNORM
+ 2, // BC4_SNORM
+ 2, // BC5_UNORM
+ 2, // BC5_SNORM
+ 2, // BC7_UNORM
+ 2, // BC6H_UFLOAT
+ 2, // BC6H_SFLOAT
+ 2, // ASTC_2D_4X4_UNORM
+ 0, // B8G8R8A8_UNORM
+ 0, // R32G32B32A32_FLOAT
+ 0, // R32G32B32A32_SINT
+ 0, // R32G32_FLOAT
+ 0, // R32G32_SINT
+ 0, // R32_FLOAT
+ 0, // R16_FLOAT
+ 0, // R16_UNORM
+ 0, // R16_SNORM
+ 0, // R16_UINT
+ 0, // R16_SINT
+ 0, // R16G16_UNORM
+ 0, // R16G16_FLOAT
+ 0, // R16G16_UINT
+ 0, // R16G16_SINT
+ 0, // R16G16_SNORM
+ 0, // R32G32B32_FLOAT
+ 0, // A8B8G8R8_SRGB
+ 0, // R8G8_UNORM
+ 0, // R8G8_SNORM
+ 0, // R8G8_SINT
+ 0, // R8G8_UINT
+ 0, // R32G32_UINT
+ 0, // R16G16B16X16_FLOAT
+ 0, // R32_UINT
+ 0, // R32_SINT
+ 2, // ASTC_2D_8X8_UNORM
+ 2, // ASTC_2D_8X5_UNORM
+ 2, // ASTC_2D_5X4_UNORM
+ 0, // B8G8R8A8_SRGB
+ 2, // BC1_RGBA_SRGB
+ 2, // BC2_SRGB
+ 2, // BC3_SRGB
+ 2, // BC7_SRGB
+ 0, // A4B4G4R4_UNORM
2, // ASTC_2D_4X4_SRGB
2, // ASTC_2D_8X8_SRGB
2, // ASTC_2D_8X5_SRGB
2, // ASTC_2D_5X4_SRGB
- 2, // ASTC_2D_5X5
+ 2, // ASTC_2D_5X5_UNORM
2, // ASTC_2D_5X5_SRGB
- 2, // ASTC_2D_10X8
+ 2, // ASTC_2D_10X8_UNORM
2, // ASTC_2D_10X8_SRGB
- 2, // ASTC_2D_6X6
+ 2, // ASTC_2D_6X6_UNORM
2, // ASTC_2D_6X6_SRGB
- 2, // ASTC_2D_10X10
+ 2, // ASTC_2D_10X10_UNORM
2, // ASTC_2D_10X10_SRGB
- 2, // ASTC_2D_12X12
+ 2, // ASTC_2D_12X12_UNORM
2, // ASTC_2D_12X12_SRGB
- 2, // ASTC_2D_8X6
+ 2, // ASTC_2D_8X6_UNORM
2, // ASTC_2D_8X6_SRGB
- 2, // ASTC_2D_6X5
+ 2, // ASTC_2D_6X5_UNORM
2, // ASTC_2D_6X5_SRGB
- 0, // E5B9G9R9F
- 0, // Z32F
- 0, // Z16
- 0, // Z24S8
- 0, // S8Z24
- 0, // Z32FS8
+ 0, // E5B9G9R9_FLOAT
+ 0, // D32_FLOAT
+ 0, // D16_UNORM
+ 0, // D24_UNORM_S8_UINT
+ 0, // S8_UINT_D24_UNORM
+ 0, // D32_FLOAT_S8_UINT
}};
/**
@@ -229,86 +251,97 @@ inline constexpr u32 GetCompressionFactor(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16S
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG8UI
- 1, // RG32UI
- 1, // RGBX16F
- 1, // R32UI
- 1, // R32I
- 8, // ASTC_2D_8X8
- 8, // ASTC_2D_8X5
- 5, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 1, // R4G4B4A4U
+ 1, // A8B8G8R8_UNORM
+ 1, // A8B8G8R8_SNORM
+ 1, // A8B8G8R8_SINT
+ 1, // A8B8G8R8_UINT
+ 1, // R5G6B5_UNORM
+ 1, // B5G6R5_UNORM
+ 1, // A1R5G5B5_UNORM
+ 1, // A2B10G10R10_UNORM
+ 1, // A2B10G10R10_UINT
+ 1, // A1B5G5R5_UNORM
+ 1, // R8_UNORM
+ 1, // R8_SNORM
+ 1, // R8_SINT
+ 1, // R8_UINT
+ 1, // R16G16B16A16_FLOAT
+ 1, // R16G16B16A16_UNORM
+ 1, // R16G16B16A16_SNORM
+ 1, // R16G16B16A16_SINT
+ 1, // R16G16B16A16_UINT
+ 1, // B10G11R11_FLOAT
+ 1, // R32G32B32A32_UINT
+ 4, // BC1_RGBA_UNORM
+ 4, // BC2_UNORM
+ 4, // BC3_UNORM
+ 4, // BC4_UNORM
+ 4, // BC4_SNORM
+ 4, // BC5_UNORM
+ 4, // BC5_SNORM
+ 4, // BC7_UNORM
+ 4, // BC6H_UFLOAT
+ 4, // BC6H_SFLOAT
+ 4, // ASTC_2D_4X4_UNORM
+ 1, // B8G8R8A8_UNORM
+ 1, // R32G32B32A32_FLOAT
+ 1, // R32G32B32A32_SINT
+ 1, // R32G32_FLOAT
+ 1, // R32G32_SINT
+ 1, // R32_FLOAT
+ 1, // R16_FLOAT
+ 1, // R16_UNORM
+ 1, // R16_SNORM
+ 1, // R16_UINT
+ 1, // R16_SINT
+ 1, // R16G16_UNORM
+ 1, // R16G16_FLOAT
+ 1, // R16G16_UINT
+ 1, // R16G16_SINT
+ 1, // R16G16_SNORM
+ 1, // R32G32B32_FLOAT
+ 1, // A8B8G8R8_SRGB
+ 1, // R8G8_UNORM
+ 1, // R8G8_SNORM
+ 1, // R8G8_SINT
+ 1, // R8G8_UINT
+ 1, // R32G32_UINT
+ 1, // R16G16B16X16_FLOAT
+ 1, // R32_UINT
+ 1, // R32_SINT
+ 8, // ASTC_2D_8X8_UNORM
+ 8, // ASTC_2D_8X5_UNORM
+ 5, // ASTC_2D_5X4_UNORM
+ 1, // B8G8R8A8_SRGB
+ 4, // BC1_RGBA_SRGB
+ 4, // BC2_SRGB
+ 4, // BC3_SRGB
+ 4, // BC7_SRGB
+ 1, // A4B4G4R4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
8, // ASTC_2D_8X5_SRGB
5, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_UNORM
5, // ASTC_2D_5X5_SRGB
- 10, // ASTC_2D_10X8
+ 10, // ASTC_2D_10X8_UNORM
10, // ASTC_2D_10X8_SRGB
- 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
- 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
- 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_UNORM
12, // ASTC_2D_12X12_SRGB
- 8, // ASTC_2D_8X6
+ 8, // ASTC_2D_8X6_UNORM
8, // ASTC_2D_8X6_SRGB
- 6, // ASTC_2D_6X5
+ 6, // ASTC_2D_6X5_UNORM
6, // ASTC_2D_6X5_SRGB
- 1, // E5B9G9R9F
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
+ 1, // E5B9G9R9_FLOAT
+ 1, // D32_FLOAT
+ 1, // D16_UNORM
+ 1, // D24_UNORM_S8_UINT
+ 1, // S8_UINT_D24_UNORM
+ 1, // D32_FLOAT_S8_UINT
}};
static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
@@ -320,86 +353,97 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16S
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG8UI
- 1, // RG32UI
- 1, // RGBX16F
- 1, // R32UI
- 1, // R32I
- 8, // ASTC_2D_8X8
- 5, // ASTC_2D_8X5
- 4, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 1, // R4G4B4A4U
+ 1, // A8B8G8R8_UNORM
+ 1, // A8B8G8R8_SNORM
+ 1, // A8B8G8R8_SINT
+ 1, // A8B8G8R8_UINT
+ 1, // R5G6B5_UNORM
+ 1, // B5G6R5_UNORM
+ 1, // A1R5G5B5_UNORM
+ 1, // A2B10G10R10_UNORM
+ 1, // A2B10G10R10_UINT
+ 1, // A1B5G5R5_UNORM
+ 1, // R8_UNORM
+ 1, // R8_SNORM
+ 1, // R8_SINT
+ 1, // R8_UINT
+ 1, // R16G16B16A16_FLOAT
+ 1, // R16G16B16A16_UNORM
+ 1, // R16G16B16A16_SNORM
+ 1, // R16G16B16A16_SINT
+ 1, // R16G16B16A16_UINT
+ 1, // B10G11R11_FLOAT
+ 1, // R32G32B32A32_UINT
+ 4, // BC1_RGBA_UNORM
+ 4, // BC2_UNORM
+ 4, // BC3_UNORM
+ 4, // BC4_UNORM
+ 4, // BC4_SNORM
+ 4, // BC5_UNORM
+ 4, // BC5_SNORM
+ 4, // BC7_UNORM
+ 4, // BC6H_UFLOAT
+ 4, // BC6H_SFLOAT
+ 4, // ASTC_2D_4X4_UNORM
+ 1, // B8G8R8A8_UNORM
+ 1, // R32G32B32A32_FLOAT
+ 1, // R32G32B32A32_SINT
+ 1, // R32G32_FLOAT
+ 1, // R32G32_SINT
+ 1, // R32_FLOAT
+ 1, // R16_FLOAT
+ 1, // R16_UNORM
+ 1, // R16_SNORM
+ 1, // R16_UINT
+ 1, // R16_SINT
+ 1, // R16G16_UNORM
+ 1, // R16G16_FLOAT
+ 1, // R16G16_UINT
+ 1, // R16G16_SINT
+ 1, // R16G16_SNORM
+ 1, // R32G32B32_FLOAT
+ 1, // A8B8G8R8_SRGB
+ 1, // R8G8_UNORM
+ 1, // R8G8_SNORM
+ 1, // R8G8_SINT
+ 1, // R8G8_UINT
+ 1, // R32G32_UINT
+ 1, // R16G16B16X16_FLOAT
+ 1, // R32_UINT
+ 1, // R32_SINT
+ 8, // ASTC_2D_8X8_UNORM
+ 5, // ASTC_2D_8X5_UNORM
+ 4, // ASTC_2D_5X4_UNORM
+ 1, // B8G8R8A8_SRGB
+ 4, // BC1_RGBA_SRGB
+ 4, // BC2_SRGB
+ 4, // BC3_SRGB
+ 4, // BC7_SRGB
+ 1, // A4B4G4R4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
5, // ASTC_2D_8X5_SRGB
4, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_UNORM
5, // ASTC_2D_5X5_SRGB
- 8, // ASTC_2D_10X8
+ 8, // ASTC_2D_10X8_UNORM
8, // ASTC_2D_10X8_SRGB
- 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
- 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
- 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_UNORM
12, // ASTC_2D_12X12_SRGB
- 6, // ASTC_2D_8X6
+ 6, // ASTC_2D_8X6_UNORM
6, // ASTC_2D_8X6_SRGB
- 5, // ASTC_2D_6X5
+ 5, // ASTC_2D_6X5_UNORM
5, // ASTC_2D_6X5_SRGB
- 1, // E5B9G9R9F
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
+ 1, // E5B9G9R9_FLOAT
+ 1, // D32_FLOAT
+ 1, // D16_UNORM
+ 1, // D24_UNORM_S8_UINT
+ 1, // S8_UINT_D24_UNORM
+ 1, // D32_FLOAT_S8_UINT
}};
static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
@@ -411,86 +455,97 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
- 32, // ABGR8U
- 32, // ABGR8S
- 32, // ABGR8UI
- 16, // B5G6R5U
- 32, // A2B10G10R10U
- 16, // A1B5G5R5U
- 8, // R8U
- 8, // R8UI
- 64, // RGBA16F
- 64, // RGBA16U
- 64, // RGBA16S
- 64, // RGBA16UI
- 32, // R11FG11FB10F
- 128, // RGBA32UI
- 64, // DXT1
- 128, // DXT23
- 128, // DXT45
- 64, // DXN1
- 128, // DXN2UNORM
- 128, // DXN2SNORM
- 128, // BC7U
- 128, // BC6H_UF16
- 128, // BC6H_SF16
- 128, // ASTC_2D_4X4
- 32, // BGRA8
- 128, // RGBA32F
- 64, // RG32F
- 32, // R32F
- 16, // R16F
- 16, // R16U
- 16, // R16S
- 16, // R16UI
- 16, // R16I
- 32, // RG16
- 32, // RG16F
- 32, // RG16UI
- 32, // RG16I
- 32, // RG16S
- 96, // RGB32F
- 32, // RGBA8_SRGB
- 16, // RG8U
- 16, // RG8S
- 16, // RG8UI
- 64, // RG32UI
- 64, // RGBX16F
- 32, // R32UI
- 32, // R32I
- 128, // ASTC_2D_8X8
- 128, // ASTC_2D_8X5
- 128, // ASTC_2D_5X4
- 32, // BGRA8_SRGB
- 64, // DXT1_SRGB
- 128, // DXT23_SRGB
- 128, // DXT45_SRGB
- 128, // BC7U
- 16, // R4G4B4A4U
+ 32, // A8B8G8R8_UNORM
+ 32, // A8B8G8R8_SNORM
+ 32, // A8B8G8R8_SINT
+ 32, // A8B8G8R8_UINT
+ 16, // R5G6B5_UNORM
+ 16, // B5G6R5_UNORM
+ 16, // A1R5G5B5_UNORM
+ 32, // A2B10G10R10_UNORM
+ 32, // A2B10G10R10_UINT
+ 16, // A1B5G5R5_UNORM
+ 8, // R8_UNORM
+ 8, // R8_SNORM
+ 8, // R8_SINT
+ 8, // R8_UINT
+ 64, // R16G16B16A16_FLOAT
+ 64, // R16G16B16A16_UNORM
+ 64, // R16G16B16A16_SNORM
+ 64, // R16G16B16A16_SINT
+ 64, // R16G16B16A16_UINT
+ 32, // B10G11R11_FLOAT
+ 128, // R32G32B32A32_UINT
+ 64, // BC1_RGBA_UNORM
+ 128, // BC2_UNORM
+ 128, // BC3_UNORM
+ 64, // BC4_UNORM
+ 64, // BC4_SNORM
+ 128, // BC5_UNORM
+ 128, // BC5_SNORM
+ 128, // BC7_UNORM
+ 128, // BC6H_UFLOAT
+ 128, // BC6H_SFLOAT
+ 128, // ASTC_2D_4X4_UNORM
+ 32, // B8G8R8A8_UNORM
+ 128, // R32G32B32A32_FLOAT
+ 128, // R32G32B32A32_SINT
+ 64, // R32G32_FLOAT
+ 64, // R32G32_SINT
+ 32, // R32_FLOAT
+ 16, // R16_FLOAT
+ 16, // R16_UNORM
+ 16, // R16_SNORM
+ 16, // R16_UINT
+ 16, // R16_SINT
+ 32, // R16G16_UNORM
+ 32, // R16G16_FLOAT
+ 32, // R16G16_UINT
+ 32, // R16G16_SINT
+ 32, // R16G16_SNORM
+ 96, // R32G32B32_FLOAT
+ 32, // A8B8G8R8_SRGB
+ 16, // R8G8_UNORM
+ 16, // R8G8_SNORM
+ 16, // R8G8_SINT
+ 16, // R8G8_UINT
+ 64, // R32G32_UINT
+ 64, // R16G16B16X16_FLOAT
+ 32, // R32_UINT
+ 32, // R32_SINT
+ 128, // ASTC_2D_8X8_UNORM
+ 128, // ASTC_2D_8X5_UNORM
+ 128, // ASTC_2D_5X4_UNORM
+ 32, // B8G8R8A8_SRGB
+ 64, // BC1_RGBA_SRGB
+ 128, // BC2_SRGB
+ 128, // BC3_SRGB
+ 128, // BC7_UNORM
+ 16, // A4B4G4R4_UNORM
128, // ASTC_2D_4X4_SRGB
128, // ASTC_2D_8X8_SRGB
128, // ASTC_2D_8X5_SRGB
128, // ASTC_2D_5X4_SRGB
- 128, // ASTC_2D_5X5
+ 128, // ASTC_2D_5X5_UNORM
128, // ASTC_2D_5X5_SRGB
- 128, // ASTC_2D_10X8
+ 128, // ASTC_2D_10X8_UNORM
128, // ASTC_2D_10X8_SRGB
- 128, // ASTC_2D_6X6
+ 128, // ASTC_2D_6X6_UNORM
128, // ASTC_2D_6X6_SRGB
- 128, // ASTC_2D_10X10
+ 128, // ASTC_2D_10X10_UNORM
128, // ASTC_2D_10X10_SRGB
- 128, // ASTC_2D_12X12
+ 128, // ASTC_2D_12X12_UNORM
128, // ASTC_2D_12X12_SRGB
- 128, // ASTC_2D_8X6
+ 128, // ASTC_2D_8X6_UNORM
128, // ASTC_2D_8X6_SRGB
- 128, // ASTC_2D_6X5
+ 128, // ASTC_2D_6X5_UNORM
128, // ASTC_2D_6X5_SRGB
- 32, // E5B9G9R9F
- 32, // Z32F
- 16, // Z16
- 32, // Z24S8
- 32, // S8Z24
- 64, // Z32FS8
+ 32, // E5B9G9R9_FLOAT
+ 32, // D32_FLOAT
+ 16, // D16_UNORM
+ 32, // D24_UNORM_S8_UINT
+ 32, // S8_UINT_D24_UNORM
+ 64, // D32_FLOAT_S8_UINT
}};
static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -529,7 +584,4 @@ bool IsPixelFormatSRGB(PixelFormat format);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
-/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
-bool IsFormatBCn(PixelFormat format);
-
} // namespace VideoCore::Surface
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index f476f03b0..7d5a75648 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -19,8 +19,6 @@ constexpr auto SNORM = ComponentType::SNORM;
constexpr auto UNORM = ComponentType::UNORM;
constexpr auto SINT = ComponentType::SINT;
constexpr auto UINT = ComponentType::UINT;
-constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16;
-constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16;
constexpr auto FLOAT = ComponentType::FLOAT;
constexpr bool C = false; // Normal color
constexpr bool S = true; // Srgb
@@ -41,119 +39,126 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 78> DefinitionTable = {{
- {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
- {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
- {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
- {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB},
+constexpr std::array<Table, 86> DefinitionTable = {{
+ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
+ {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
+ {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
+ {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
+ {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
- {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U},
+ {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
- {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U},
+ {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
+ {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
- {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U},
+ {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
- {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U},
+ {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
- {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U},
- {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI},
+ {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
+ {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
+ {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
+ {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
- {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
- {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
- {TextureFormat::G8R8, C, UINT, UINT, UINT, UINT, PixelFormat::RG8UI},
+ {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
+ {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
+ {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
+ {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
- {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
- {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
- {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
- {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
+ {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
+ {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
+ {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
+ {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
+ {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
- {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F},
- {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16},
- {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S},
- {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI},
- {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I},
+ {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
+ {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
+ {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
+ {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
+ {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
- {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F},
- {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U},
- {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S},
- {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI},
- {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I},
+ {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
+ {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
+ {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
+ {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
+ {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
- {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F},
+ {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
- {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F},
- {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI},
+ {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
+ {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
+ {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
- {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F},
+ {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
- {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F},
- {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI},
+ {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
+ {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
+ {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
- {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
- {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
- {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},
+ {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
+ {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
+ {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
- {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
+ {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
- {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
- {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
- {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
- {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
- {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
+ {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
+ {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
+ {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
+ {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
+ {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
- {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
- {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
+ {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
+ {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
- {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23},
- {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB},
+ {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
+ {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
- {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45},
- {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB},
+ {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
+ {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
- // TODO: Use a different pixel format for SNORM
- {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1},
- {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1},
+ {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
+ {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
- {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM},
- {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM},
+ {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
+ {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
- {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U},
- {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB},
+ {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
+ {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
- {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16},
- {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16},
+ {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
+ {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
- {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4},
+ {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
{TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
- {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4},
+ {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
{TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
- {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5},
+ {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
{TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
- {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8},
+ {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
{TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
- {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5},
+ {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
{TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
- {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8},
+ {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
{TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
- {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6},
+ {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
{TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
- {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10},
+ {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
{TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
- {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12},
+ {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
{TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
- {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6},
+ {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
{TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
- {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5},
+ {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM},
{TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB},
}};
@@ -184,7 +189,7 @@ PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb
static_cast<int>(format), is_srgb, static_cast<int>(red_component),
static_cast<int>(green_component), static_cast<int>(blue_component),
static_cast<int>(alpha_component));
- return PixelFormat::ABGR8U;
+ return PixelFormat::A8B8G8R8_UNORM;
}
void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 715f39d0d..b44c09d71 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -115,17 +115,24 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
if (gpu_addr == candidate_gpu_addr) {
return {{0, 0}};
}
+
if (candidate_gpu_addr < gpu_addr) {
- return {};
+ return std::nullopt;
}
+
const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
const auto layer{static_cast<u32>(relative_address / layer_size)};
+ if (layer >= params.depth) {
+ return std::nullopt;
+ }
+
const GPUVAddr mipmap_address = relative_address - layer_size * layer;
const auto mipmap_it =
Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
if (mipmap_it == mipmap_offsets.end()) {
- return {};
+ return std::nullopt;
}
+
const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
return std::make_pair(layer, level);
}
@@ -225,7 +232,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
}
}
- if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
+ if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
return;
}
@@ -248,12 +255,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
- // Special case for 3D Texture Segments
- const bool must_read_current_data =
- params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
- if (must_read_current_data) {
+
+ if (params.target == SurfaceTarget::Texture3D) {
+ // Special case for 3D texture segments
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 79e10ffbb..173f2edba 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -217,8 +217,8 @@ public:
}
bool IsProtected() const {
- // Only 3D Slices are to be protected
- return is_target && params.block_depth > 0;
+ // Only 3D slices are to be protected
+ return is_target && params.target == SurfaceTarget::Texture3D;
}
bool IsRenderTarget() const {
@@ -250,6 +250,11 @@ public:
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
}
+ TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
+ return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
+ base_level, num_levels));
+ }
+
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
const GPUVAddr view_addr,
const std::size_t candidate_size, const u32 mipmap,
@@ -272,8 +277,8 @@ public:
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
const std::size_t candidate_size) {
if (params.target == SurfaceTarget::Texture3D ||
- (params.num_levels == 1 && !params.is_layered) ||
- view_params.target == SurfaceTarget::Texture3D) {
+ view_params.target == SurfaceTarget::Texture3D ||
+ (params.num_levels == 1 && !params.is_layered)) {
return {};
}
const auto layer_mipmap{GetLayerMipmap(view_addr)};
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 884fabffe..e8515321b 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -74,21 +74,21 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
SurfaceParams params;
params.is_tiled = tic.IsTiled();
params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+ params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
+ params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
+ params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
params.pixel_format = lookup_table.GetPixelFormat(
tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
params.type = GetFormatType(params.pixel_format);
if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
- case PixelFormat::R16U:
- case PixelFormat::R16F:
- params.pixel_format = PixelFormat::Z16;
+ case PixelFormat::R16_UNORM:
+ case PixelFormat::R16_FLOAT:
+ params.pixel_format = PixelFormat::D16_UNORM;
break;
- case PixelFormat::R32F:
- params.pixel_format = PixelFormat::Z32F;
+ case PixelFormat::R32_FLOAT:
+ params.pixel_format = PixelFormat::D32_FLOAT;
break;
default:
UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
@@ -96,7 +96,6 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
}
params.type = GetFormatType(params.pixel_format);
}
- params.type = GetFormatType(params.pixel_format);
// TODO: on 1DBuffer we should use the tic info.
if (tic.IsBuffer()) {
params.target = SurfaceTarget::TextureBuffer;
@@ -130,14 +129,13 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
SurfaceParams params;
params.is_tiled = tic.IsTiled();
params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+ params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
+ params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
+ params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
params.pixel_format = lookup_table.GetPixelFormat(
tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
params.type = GetFormatType(params.pixel_format);
- params.type = GetFormatType(params.pixel_format);
params.target = ImageTypeToSurfaceTarget(entry.type);
// TODO: on 1DBuffer we should use the tic info.
if (tic.IsBuffer()) {
@@ -165,38 +163,40 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
return params;
}
-SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- SurfaceParams params;
- params.is_tiled = regs.zeta.memory_layout.type ==
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
- params.srgb_conversion = false;
- params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U);
- params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U);
- params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
- params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
- params.type = GetFormatType(params.pixel_format);
- params.width = regs.zeta_width;
- params.height = regs.zeta_height;
- params.pitch = 0;
- params.num_levels = 1;
- params.emulated_levels = 1;
-
- const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0;
- params.is_layered = is_layered;
- params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
- params.depth = is_layered ? regs.zeta_layers.Value() : 1U;
- return params;
+SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
+ const auto& regs = maxwell3d.regs;
+ const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
+ const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
+ const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
+ return {
+ .is_tiled = regs.zeta.memory_layout.type ==
+ Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
+ .srgb_conversion = false,
+ .is_layered = is_layered,
+ .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
+ .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
+ .block_depth = block_depth,
+ .tile_width_spacing = 1,
+ .width = regs.zeta_width,
+ .height = regs.zeta_height,
+ .depth = is_layered ? regs.zeta_layers.Value() : 1U,
+ .pitch = 0,
+ .num_levels = 1,
+ .emulated_levels = 1,
+ .pixel_format = pixel_format,
+ .type = GetFormatType(pixel_format),
+ .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
+ };
}
-SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
- const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+ std::size_t index) {
+ const auto& config{maxwell3d.regs.rt[index]};
SurfaceParams params;
params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
- params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
- config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
+ params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
+ config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
params.block_width = config.memory_layout.block_width;
params.block_height = config.memory_layout.block_height;
params.block_depth = config.memory_layout.block_depth;
@@ -215,33 +215,47 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.num_levels = 1;
params.emulated_levels = 1;
- const bool is_layered = config.layers > 1 && params.block_depth == 0;
- params.is_layered = is_layered;
- params.depth = is_layered ? config.layers.Value() : 1;
- params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
+ if (config.memory_layout.is_3d != 0) {
+ params.depth = config.layers.Value();
+ params.is_layered = false;
+ params.target = SurfaceTarget::Texture3D;
+ } else if (config.layers > 1) {
+ params.depth = config.layers.Value();
+ params.is_layered = true;
+ params.target = SurfaceTarget::Texture2DArray;
+ } else {
+ params.depth = 1;
+ params.is_layered = false;
+ params.target = SurfaceTarget::Texture2D;
+ }
return params;
}
SurfaceParams SurfaceParams::CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
- SurfaceParams params{};
- params.is_tiled = !config.linear;
- params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
- config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
- params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0,
- params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0,
- params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
- params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
- params.type = GetFormatType(params.pixel_format);
- params.width = config.width;
- params.height = config.height;
- params.pitch = config.pitch;
- // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
- params.target = SurfaceTarget::Texture2D;
- params.depth = 1;
- params.num_levels = 1;
- params.emulated_levels = 1;
+ const bool is_tiled = !config.linear;
+ const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+
+ SurfaceParams params{
+ .is_tiled = is_tiled,
+ .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
+ config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
+ .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
+ .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
+ .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
+ .tile_width_spacing = 1,
+ .width = config.width,
+ .height = config.height,
+ .depth = 1,
+ .pitch = config.pitch,
+ .num_levels = 1,
+ .emulated_levels = 1,
+ .pixel_format = pixel_format,
+ .type = GetFormatType(pixel_format),
+ // TODO(Rodrigo): Try to guess texture arrays from parameters
+ .target = SurfaceTarget::Texture2D,
+ };
+
params.is_layered = params.IsLayered();
return params;
}
@@ -334,8 +348,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
}
if (is_tiled && is_layered) {
- return Common::AlignBits(size,
- Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
+ return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
}
return size;
}
@@ -409,7 +422,7 @@ std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
const u32 block_size = GetBlockSize();
const u32 block_index = offset / block_size;
const u32 gob_offset = offset % block_size;
- const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
+ const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
const u32 x_gob_pixels = 64U / GetBytesPerPixel();
const u32 x_block_pixels = x_gob_pixels << block_width;
const u32 y_block_pixels = 8U << block_height;
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 24957df8d..4466c3c34 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -33,10 +33,11 @@ public:
const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(Core::System& system);
+ static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
/// Creates SurfaceCachedParams from a framebuffer configuration.
- static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
+ static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+ std::size_t index);
/// Creates SurfaceCachedParams from a Fermi2D surface configuration.
static SurfaceParams CreateForFermiCopySurface(
@@ -204,7 +205,7 @@ public:
static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
const u32 block_depth) {
return Common::AlignBits(out_size,
- Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
+ Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
}
/// Converts a width from a type of surface into another. This helps represent the
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6f63217a2..ea835c59f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -24,6 +24,7 @@
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
+#include "video_core/compatible_formats.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
@@ -47,8 +48,8 @@ class RasterizerInterface;
namespace VideoCommon {
+using VideoCore::Surface::FormatCompatibility;
using VideoCore::Surface::PixelFormat;
-
using VideoCore::Surface::SurfaceTarget;
using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
@@ -134,8 +135,7 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
@@ -159,8 +159,7 @@ public:
if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
@@ -182,11 +181,11 @@ public:
TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex};
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
+ auto& dirty = maxwell3d.dirty;
+ if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
return depth_buffer.view;
}
- maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
+ dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()};
@@ -194,13 +193,12 @@ public:
SetEmptyDepthBuffer();
return {};
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyDepthBuffer();
return {};
}
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
+ const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@@ -214,7 +212,6 @@ public:
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
- auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
return render_targets[index].view;
}
@@ -234,21 +231,20 @@ public:
return {};
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyColorBuffer(index);
return {};
}
auto surface_view =
- GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+ GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index),
preserve_contents, true);
if (render_targets[index].target) {
auto& surface = render_targets[index].target;
surface->MarkAsRenderTarget(false, NO_RT);
const auto& cr_params = surface->GetSurfaceParams();
- if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) {
+ if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
AsyncFlushSurface(surface);
}
}
@@ -298,15 +294,12 @@ public:
const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
- const std::optional<VAddr> dst_cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
- const std::optional<VAddr> src_cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
- std::pair<TSurface, TView> dst_surface =
- GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
- std::pair<TSurface, TView> src_surface =
- GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
- ImageBlit(src_surface.second, dst_surface.second, copy_config);
+
+ const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
+ const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
+ std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
+ TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
+ ImageBlit(src_surface, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
@@ -359,9 +352,11 @@ public:
}
protected:
- explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- bool is_astc_supported)
- : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
+ explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+ bool is_astc_supported_)
+ : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ gpu_memory{gpu_memory_} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i);
}
@@ -374,9 +369,9 @@ protected:
siblings_table[static_cast<std::size_t>(b)] = a;
};
std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
- make_siblings(PixelFormat::Z16, PixelFormat::R16U);
- make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
- make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
+ make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
+ make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
+ make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
sampled_textures.reserve(64);
}
@@ -396,7 +391,7 @@ protected:
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
- auto& dirty = system.GPU().Maxwell3D().dirty;
+ auto& dirty = maxwell3d.dirty;
const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) {
dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
@@ -409,8 +404,7 @@ protected:
void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr();
const std::size_t size = surface->GetSizeInBytes();
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr);
@@ -460,7 +454,6 @@ protected:
return new_surface;
}
- Core::System& system;
const bool is_astc_supported;
private:
@@ -508,12 +501,12 @@ private:
return RecycleStrategy::Flush;
}
// 3D Textures decision
- if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
+ if (params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
for (const auto& s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
- if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
+ if (s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
}
@@ -597,7 +590,7 @@ private:
} else {
new_surface = GetUncachedSurface(gpu_addr, params);
}
- const auto& final_params = new_surface->GetSurfaceParams();
+ const SurfaceParams& final_params = new_surface->GetSurfaceParams();
if (cr_params.type != final_params.type) {
if (Settings::IsGPULevelExtreme()) {
BufferCopy(current_surface, new_surface);
@@ -605,7 +598,7 @@ private:
} else {
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
for (auto& brick : bricks) {
- ImageCopy(current_surface, new_surface, brick);
+ TryCopyImage(current_surface, new_surface, brick);
}
}
Unregister(current_surface);
@@ -696,7 +689,7 @@ private:
}
const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
src_params.depth);
- ImageCopy(surface, new_surface, copy_params);
+ TryCopyImage(surface, new_surface, copy_params);
}
}
if (passed_tests == 0) {
@@ -731,51 +724,9 @@ private:
*/
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
const SurfaceParams& params,
- const GPUVAddr gpu_addr,
- const VAddr cpu_addr,
+ GPUVAddr gpu_addr, VAddr cpu_addr,
bool preserve_contents) {
- if (params.target == SurfaceTarget::Texture3D) {
- bool failed = false;
- if (params.num_levels > 1) {
- // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
- return std::nullopt;
- }
- TSurface new_surface = GetUncachedSurface(gpu_addr, params);
- bool modified = false;
- for (auto& surface : overlaps) {
- const SurfaceParams& src_params = surface->GetSurfaceParams();
- if (src_params.target != SurfaceTarget::Texture2D) {
- failed = true;
- break;
- }
- if (src_params.height != params.height) {
- failed = true;
- break;
- }
- if (src_params.block_depth != params.block_depth ||
- src_params.block_height != params.block_height) {
- failed = true;
- break;
- }
- const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
- const auto offsets = params.GetBlockOffsetXYZ(offset);
- const auto z = std::get<2>(offsets);
- modified |= surface->IsModified();
- const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
- 1);
- ImageCopy(surface, new_surface, copy_params);
- }
- if (failed) {
- return std::nullopt;
- }
- for (const auto& surface : overlaps) {
- Unregister(surface);
- }
- new_surface->MarkAsModified(modified, Tick());
- Register(new_surface);
- auto view = new_surface->GetMainView();
- return {{std::move(new_surface), view}};
- } else {
+ if (params.target != SurfaceTarget::Texture3D) {
for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
@@ -791,11 +742,60 @@ private:
continue;
}
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
- return {{surface, surface->GetMainView()}};
+ return std::make_pair(surface, surface->GetMainView());
}
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
+
+ if (params.num_levels > 1) {
+ // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
+ return std::nullopt;
+ }
+
+ if (overlaps.size() == 1) {
+ const auto& surface = overlaps[0];
+ const SurfaceParams& overlap_params = surface->GetSurfaceParams();
+ // Don't attempt to render to textures with more than one level for now
+ // The texture has to be to the right or the sample address if we want to render to it
+ if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
+ const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
+ const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
+ if (slice < overlap_params.depth) {
+ auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
+ return std::make_pair(std::move(surface), std::move(view));
+ }
+ }
+ }
+
+ TSurface new_surface = GetUncachedSurface(gpu_addr, params);
+ bool modified = false;
+
+ for (auto& surface : overlaps) {
+ const SurfaceParams& src_params = surface->GetSurfaceParams();
+ if (src_params.target != SurfaceTarget::Texture2D ||
+ src_params.height != params.height ||
+ src_params.block_depth != params.block_depth ||
+ src_params.block_height != params.block_height) {
+ return std::nullopt;
+ }
+ modified |= surface->IsModified();
+
+ const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
+ const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
+ const u32 width = params.width;
+ const u32 height = params.height;
+ const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
+ TryCopyImage(surface, new_surface, copy_params);
+ }
+ for (const auto& surface : overlaps) {
+ Unregister(surface);
+ }
+ new_surface->MarkAsModified(modified, Tick());
+ Register(new_surface);
+
+ TView view = new_surface->GetMainView();
+ return std::make_pair(std::move(new_surface), std::move(view));
}
/**
@@ -873,7 +873,7 @@ private:
}
}
- // Check if it's a 3D texture
+ // Manage 3D textures
if (params.block_depth > 0) {
auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
@@ -948,8 +948,7 @@ private:
* @param params The parameters on the candidate surface.
**/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
Deduction result{};
@@ -1025,7 +1024,7 @@ private:
params.pitch = 4;
params.num_levels = 1;
params.emulated_levels = 1;
- params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
+ params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM;
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
auto surface = CreateSurface(0ULL, params);
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
@@ -1048,7 +1047,7 @@ private:
void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
- auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
+ auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
if (deduced_src.Failed() || deduced_dst.Failed()) {
return;
}
@@ -1106,7 +1105,7 @@ private:
void LoadSurface(const TSurface& surface) {
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
- surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
+ surface->LoadBuffer(gpu_memory, staging_cache);
surface->UploadTexture(staging_cache.GetBuffer(0));
surface->MarkAsModified(false, Tick());
}
@@ -1117,7 +1116,7 @@ private:
}
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_cache.GetBuffer(0));
- surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
+ surface->FlushBuffer(gpu_memory, staging_cache);
surface->MarkAsModified(false, Tick());
}
@@ -1187,6 +1186,19 @@ private:
return {};
}
+ /// Try to do an image copy logging when formats are incompatible.
+ void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) {
+ const SurfaceParams& src_params = src->GetSurfaceParams();
+ const SurfaceParams& dst_params = dst->GetSurfaceParams();
+ if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) {
+ LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}",
+ static_cast<int>(dst_params.pixel_format),
+ static_cast<int>(src_params.pixel_format));
+ return;
+ }
+ ImageCopy(src, dst, copy);
+ }
+
constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
return siblings_table[static_cast<std::size_t>(format)];
}
@@ -1234,8 +1246,11 @@ private:
}
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
FormatLookupTable format_lookup_table;
+ FormatCompatibility format_compatibility;
u64 ticks{};
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
index f3efa7eb0..962921483 100644
--- a/src/video_core/textures/convert.cpp
+++ b/src/video_core/textures/convert.cpp
@@ -35,7 +35,7 @@ void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
S8Z24 s8z24_pixel{};
Z24S8 z24s8_pixel{};
constexpr auto bpp{
- VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
+ VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
@@ -73,7 +73,7 @@ void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format,
in_data, width, height, depth, block_width, block_height);
std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+ } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
}
}
@@ -85,7 +85,7 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h
static_cast<u32>(pixel_format));
UNREACHABLE();
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+ } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
}
}
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 548e4c3fe..16d46a018 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,6 +6,7 @@
#include <cstring>
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/bit_util.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
@@ -37,20 +38,10 @@ struct alignas(64) SwizzleTable {
std::array<std::array<u16, M>, N> values{};
};
-constexpr u32 gob_size_x_shift = 6;
-constexpr u32 gob_size_y_shift = 3;
-constexpr u32 gob_size_z_shift = 0;
-constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift;
+constexpr u32 FAST_SWIZZLE_ALIGN = 16;
-constexpr u32 gob_size_x = 1U << gob_size_x_shift;
-constexpr u32 gob_size_y = 1U << gob_size_y_shift;
-constexpr u32 gob_size_z = 1U << gob_size_z_shift;
-constexpr u32 gob_size = 1U << gob_size_shift;
-
-constexpr u32 fast_swizzle_align = 16;
-
-constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>();
-constexpr auto fast_swizzle_table = SwizzleTable<gob_size_y, 4, fast_swizzle_align>();
+constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>();
+constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>();
/**
* This function manages ALL the GOBs(Group of Bytes) Inside a single block.
@@ -69,17 +60,17 @@ void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, con
u32 y_address = z_address;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
- const auto& table = legacy_swizzle_table[y % gob_size_y];
+ const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (u32 x = x_start; x < x_end; x++) {
- const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]};
+ const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
}
pixel_base += stride_x;
- if ((y + 1) % gob_size_y == 0)
- y_address += gob_size;
+ if ((y + 1) % GOB_SIZE_Y == 0)
+ y_address += GOB_SIZE;
}
z_address += xy_block_size;
}
@@ -104,18 +95,18 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
u32 y_address = z_address;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
- const auto& table = fast_swizzle_table[y % gob_size_y];
- for (u32 xb = x_startb; xb < x_endb; xb += fast_swizzle_align) {
- const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
+ const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
+ for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
+ const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
const u32 pixel_index{out_x + pixel_base};
data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
- std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
+ std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
}
pixel_base += stride_x;
- if ((y + 1) % gob_size_y == 0)
- y_address += gob_size;
+ if ((y + 1) % GOB_SIZE_Y == 0)
+ y_address += GOB_SIZE;
}
z_address += xy_block_size;
}
@@ -138,9 +129,9 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
- const u32 gob_elements_x = gob_size_x / bytes_per_pixel;
- constexpr u32 gob_elements_y = gob_size_y;
- constexpr u32 gob_elements_z = gob_size_z;
+ const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel;
+ constexpr u32 gob_elements_y = GOB_SIZE_Y;
+ constexpr u32 gob_elements_z = GOB_SIZE_Z;
const u32 block_x_elements = gob_elements_x;
const u32 block_y_elements = gob_elements_y * block_height;
const u32 block_z_elements = gob_elements_z * block_depth;
@@ -148,7 +139,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
- const u32 xy_block_size = gob_size * block_height;
+ const u32 xy_block_size = GOB_SIZE * block_height;
const u32 block_size = xy_block_size * block_depth;
u32 tile_offset = 0;
for (u32 zb = 0; zb < blocks_on_z; zb++) {
@@ -182,7 +173,7 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
const u32 block_height_size{1U << block_height};
const u32 block_depth_size{1U << block_depth};
- if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
+ if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height_size,
block_depth_size, width_spacing);
@@ -193,53 +184,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
}
}
-u32 BytesPerPixel(TextureFormat format) {
- switch (format) {
- case TextureFormat::DXT1:
- case TextureFormat::DXN1:
- // In this case a 'pixel' actually refers to a 4x4 tile.
- return 8;
- case TextureFormat::DXT23:
- case TextureFormat::DXT45:
- case TextureFormat::DXN2:
- case TextureFormat::BC7U:
- case TextureFormat::BC6H_UF16:
- case TextureFormat::BC6H_SF16:
- // In this case a 'pixel' actually refers to a 4x4 tile.
- return 16;
- case TextureFormat::R32_G32_B32:
- return 12;
- case TextureFormat::ASTC_2D_4X4:
- case TextureFormat::ASTC_2D_5X4:
- case TextureFormat::ASTC_2D_8X8:
- case TextureFormat::ASTC_2D_8X5:
- case TextureFormat::ASTC_2D_10X8:
- case TextureFormat::ASTC_2D_5X5:
- case TextureFormat::A8R8G8B8:
- case TextureFormat::A2B10G10R10:
- case TextureFormat::BF10GF11RF11:
- case TextureFormat::R32:
- case TextureFormat::R16_G16:
- return 4;
- case TextureFormat::A1B5G5R5:
- case TextureFormat::B5G6R5:
- case TextureFormat::G8R8:
- case TextureFormat::R16:
- return 2;
- case TextureFormat::R8:
- return 1;
- case TextureFormat::R16_G16_B16_A16:
- return 8;
- case TextureFormat::R32_G32_B32_A32:
- return 16;
- case TextureFormat::R32_G32:
- return 8;
- default:
- UNIMPLEMENTED_MSG("Format not implemented");
- return 1;
- }
-}
-
void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
u32 block_depth, u32 width_spacing) {
@@ -259,47 +203,82 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
+ u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
u32 block_height_bit, u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
- const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
- gob_size_x};
+ const u32 image_width_in_gobs =
+ (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 dst_y = line + offset_y;
const u32 gob_address_y =
- (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
- ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
+ (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
+ ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
+ const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
- gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
- const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
- u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
- u8* dest_addr = swizzled_data + swizzled_offset;
+ gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height;
+ const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X];
+ const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel;
+ const u8* const source_line = unswizzled_data + unswizzled_offset;
+ u8* const dest_addr = swizzled_data + swizzled_offset;
std::memcpy(dest_addr, source_line, bytes_per_pixel);
}
}
}
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
- u32 block_height_bit, u32 offset_x, u32 offset_y) {
- const u32 block_height = 1U << block_height_bit;
- for (u32 line = 0; line < subrect_height; ++line) {
- const u32 y2 = line + offset_y;
- const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
- ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[y2 % gob_size_y];
- for (u32 x = 0; x < subrect_width; ++x) {
- const u32 x2 = (x + offset_x) * bytes_per_pixel;
- const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
- const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
- u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
- u8* source_addr = swizzled_data + swizzled_offset;
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+ u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+ const u32 stride = width * bytes_per_pixel;
+ const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
+
+ const u32 block_height_mask = (1U << block_height) - 1;
+ const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
+
+ for (u32 line = 0; line < line_count; ++line) {
+ const u32 src_y = line + origin_y;
+ const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
+
+ const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
+ const u32 src_offset_y = (block_y >> block_height) * block_size +
+ ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+ for (u32 column = 0; column < line_length_in; ++column) {
+ const u32 src_x = (column + origin_x) * bytes_per_pixel;
+ const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+ const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
+ const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
- std::memcpy(dest_line, source_addr, bytes_per_pixel);
+ std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
+ }
+ }
+}
+
+void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
+ u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
+ u32 origin_y, u8* output, const u8* input) {
+ UNIMPLEMENTED_IF(origin_x > 0);
+ UNIMPLEMENTED_IF(origin_y > 0);
+
+ const u32 stride = width * bytes_per_pixel;
+ const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
+
+ const u32 block_height_mask = (1U << block_height) - 1;
+ const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
+
+ for (u32 line = 0; line < line_count; ++line) {
+ const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
+ const u32 block_y = line / GOB_SIZE_Y;
+ const u32 dst_offset_y =
+ (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
+ for (u32 x = 0; x < line_length_in; ++x) {
+ const u32 dst_offset =
+ ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
+ const u32 src_offset = x * bytes_per_pixel + line * pitch;
+ std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel);
}
}
}
@@ -308,17 +287,17 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
u8* swizzle_data) {
const u32 block_height = 1U << block_height_bit;
- const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
+ const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X};
std::size_t count = 0;
for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
const std::size_t gob_address_y =
- (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
- ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[y % gob_size_y];
+ (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
+ ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
+ const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
const std::size_t gob_address =
- gob_address_y + (x / gob_size_x) * gob_size * block_height;
- const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
+ gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
+ const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X];
const u8* source_line = source_data + count;
u8* dest_addr = swizzle_data + swizzled_offset;
count++;
@@ -328,54 +307,12 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
}
}
-std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
- u32 height) {
- std::vector<u8> rgba_data;
-
- // TODO(Subv): Implement.
- switch (format) {
- case TextureFormat::DXT1:
- case TextureFormat::DXT23:
- case TextureFormat::DXT45:
- case TextureFormat::DXN1:
- case TextureFormat::DXN2:
- case TextureFormat::BC7U:
- case TextureFormat::BC6H_UF16:
- case TextureFormat::BC6H_SF16:
- case TextureFormat::ASTC_2D_4X4:
- case TextureFormat::ASTC_2D_8X8:
- case TextureFormat::ASTC_2D_5X5:
- case TextureFormat::ASTC_2D_10X8:
- case TextureFormat::A8R8G8B8:
- case TextureFormat::A2B10G10R10:
- case TextureFormat::A1B5G5R5:
- case TextureFormat::B5G6R5:
- case TextureFormat::R8:
- case TextureFormat::G8R8:
- case TextureFormat::BF10GF11RF11:
- case TextureFormat::R32_G32_B32_A32:
- case TextureFormat::R32_G32:
- case TextureFormat::R32:
- case TextureFormat::R16:
- case TextureFormat::R16_G16:
- case TextureFormat::R32_G32_B32:
- // TODO(Subv): For the time being just forward the same data without any decoding.
- rgba_data = texture_data;
- break;
- default:
- UNIMPLEMENTED_MSG("Format not implemented");
- break;
- }
-
- return rgba_data;
-}
-
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
- const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift);
- const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height);
- const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth);
+ const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
+ const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height);
+ const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth);
return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
@@ -386,14 +323,14 @@ u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
u32 bytes_per_pixel) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 gobs_in_block = 1 << block_height;
- const u32 y_blocks = gob_size_y << block_height;
- const u32 x_per_gob = gob_size_x / bytes_per_pixel;
+ const u32 y_blocks = GOB_SIZE_Y << block_height;
+ const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel;
const u32 x_blocks = div_ceil(width, x_per_gob);
- const u32 block_size = gob_size * gobs_in_block;
+ const u32 block_size = GOB_SIZE * gobs_in_block;
const u32 stride = block_size * x_blocks;
const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size;
const u32 relative_y = dst_y % y_blocks;
- return base + (relative_y / gob_size_y) * gob_size;
+ return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE;
}
} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 06f3ebf87..01e156bc8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -10,15 +10,15 @@
namespace Tegra::Texture {
-// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
-// an small rect of (64/bytes_per_pixel)X8.
-inline std::size_t GetGOBSize() {
- return 512;
-}
+constexpr u32 GOB_SIZE_X = 64;
+constexpr u32 GOB_SIZE_Y = 8;
+constexpr u32 GOB_SIZE_Z = 1;
+constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
-inline std::size_t GetGOBSizeShift() {
- return 9;
-}
+constexpr std::size_t GOB_SIZE_X_SHIFT = 6;
+constexpr std::size_t GOB_SIZE_Y_SHIFT = 3;
+constexpr std::size_t GOB_SIZE_Z_SHIFT = 0;
+constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
/// Unswizzles a swizzled texture without changing its format.
void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
@@ -38,23 +38,36 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
-/// Decodes an unswizzled texture into a A8R8G8B8 texture.
-std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
- u32 height);
-
/// This function calculates the correct size of a texture depending if it's tiled or not.
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth);
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
- u32 offset_x, u32 offset_y);
+ u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
+ u32 block_height_bit, u32 offset_x, u32 offset_y);
/// Copies a tiled subrectangle into a linear surface.
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
- u32 offset_x, u32 offset_y);
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+ u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
+
+/// @brief Swizzles a 2D array of pixels into a 3D texture
+/// @param line_length_in Number of pixels per line
+/// @param line_count Number of lines
+/// @param pitch Number of bytes per line
+/// @param width Width of the swizzled texture
+/// @param height Height of the swizzled texture
+/// @param bytes_per_pixel Number of bytes used per pixel
+/// @param block_height Block height shift
+/// @param block_depth Block depth shift
+/// @param origin_x Column offset in pixels of the swizzled texture
+/// @param origin_y Row offset in pixels of the swizzled texture
+/// @param output Pointer to the pixels of the swizzled texture
+/// @param input Pointer to the 2D array of pixels used as input
+/// @pre input and output points to an array large enough to hold the number of bytes used
+void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
+ u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
+ u32 origin_y, u8* output, const u8* input);
void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
std::size_t copy_size, const u8* source_data, u8* swizzle_data);
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index d1939d744..4171e3ef2 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -48,7 +48,7 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
};
unsigned SettingsMinimumAnisotropy() noexcept {
- switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
+ switch (static_cast<Anisotropy>(Settings::values.max_anisotropy.GetValue())) {
default:
case Anisotropy::Default:
return 1U;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index eba05aced..0574fef12 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -12,10 +12,10 @@
namespace Tegra::Texture {
enum class TextureFormat : u32 {
- R32_G32_B32_A32 = 0x01,
- R32_G32_B32 = 0x02,
- R16_G16_B16_A16 = 0x03,
- R32_G32 = 0x04,
+ R32G32B32A32 = 0x01,
+ R32G32B32 = 0x02,
+ R16G16B16A16 = 0x03,
+ R32G32 = 0x04,
R32_B24G8 = 0x05,
ETC2_RGB = 0x06,
X8B8G8R8 = 0x07,
@@ -23,19 +23,19 @@ enum class TextureFormat : u32 {
A2B10G10R10 = 0x09,
ETC2_RGB_PTA = 0x0a,
ETC2_RGBA = 0x0b,
- R16_G16 = 0x0c,
- G8R24 = 0x0d,
- G24R8 = 0x0e,
+ R16G16 = 0x0c,
+ R24G8 = 0x0d,
+ R8G24 = 0x0e,
R32 = 0x0f,
- BC6H_SF16 = 0x10,
- BC6H_UF16 = 0x11,
+ BC6H_SFLOAT = 0x10,
+ BC6H_UFLOAT = 0x11,
A4B4G4R4 = 0x12,
A5B5G5R1 = 0x13,
A1B5G5R5 = 0x14,
B5G6R5 = 0x15,
B6G5R5 = 0x16,
- BC7U = 0x17,
- G8R8 = 0x18,
+ BC7 = 0x17,
+ R8G8 = 0x18,
EAC = 0x19,
EACX2 = 0x1a,
R16 = 0x1b,
@@ -43,23 +43,23 @@ enum class TextureFormat : u32 {
R8 = 0x1d,
G4R4 = 0x1e,
R1 = 0x1f,
- E5B9G9R9_SHAREDEXP = 0x20,
- BF10GF11RF11 = 0x21,
+ E5B9G9R9 = 0x20,
+ B10G11R11 = 0x21,
G8B8G8R8 = 0x22,
B8G8R8G8 = 0x23,
- DXT1 = 0x24,
- DXT23 = 0x25,
- DXT45 = 0x26,
- DXN1 = 0x27,
- DXN2 = 0x28,
- S8Z24 = 0x29,
+ BC1_RGBA = 0x24,
+ BC2 = 0x25,
+ BC3 = 0x26,
+ BC4 = 0x27,
+ BC5 = 0x28,
+ S8D24 = 0x29,
X8Z24 = 0x2a,
- Z24S8 = 0x2b,
+ D24S8 = 0x2b,
X4V4Z24__COV4R4V = 0x2c,
X4V4Z24__COV8R8V = 0x2d,
V8Z24__COV4R12V = 0x2e,
- ZF32 = 0x2f,
- ZF32_X24S8 = 0x30,
+ D32 = 0x2f,
+ D32S8 = 0x30,
X8Z24_X20V4S8__COV4R4V = 0x31,
X8Z24_X20V4S8__COV8R8V = 0x32,
ZF32_X20V4X8__COV4R4V = 0x33,
@@ -69,7 +69,7 @@ enum class TextureFormat : u32 {
X8Z24_X16V8S8__COV4R12V = 0x37,
ZF32_X16V8X8__COV4R12V = 0x38,
ZF32_X16V8S8__COV4R12V = 0x39,
- Z16 = 0x3a,
+ D16 = 0x3a,
V8Z24__COV8R24V = 0x3b,
X8Z24_X16V8S8__COV8R24V = 0x3c,
ZF32_X16V8X8__COV8R24V = 0x3d,
@@ -375,7 +375,4 @@ struct FullTextureInfo {
TSCEntry tsc;
};
-/// Returns the number of bytes per pixel of the input texture format.
-u32 BytesPerPixel(TextureFormat format);
-
} // namespace Tegra::Texture
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index f60bdc60a..a14df06a3 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <memory>
+
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
@@ -16,43 +17,55 @@
#include "video_core/video_core.h"
namespace {
-std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
- Core::System& system,
- Core::Frontend::GraphicsContext& context) {
- switch (Settings::values.renderer_backend) {
+
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
+ Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context) {
+ auto& telemetry_session = system.TelemetrySession();
+ auto& cpu_memory = system.Memory();
+
+ switch (Settings::values.renderer_backend.GetValue()) {
case Settings::RendererBackend::OpenGL:
- return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context);
+ return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
#ifdef HAS_VULKAN
case Settings::RendererBackend::Vulkan:
- return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+ return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
#endif
default:
return nullptr;
}
}
+
} // Anonymous namespace
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+ std::unique_ptr<Tegra::GPU> gpu;
+ if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+ gpu = std::make_unique<VideoCommon::GPUAsynch>(system);
+ } else {
+ gpu = std::make_unique<VideoCommon::GPUSynch>(system);
+ }
+
auto context = emu_window.CreateSharedContext();
const auto scope = context->Acquire();
- auto renderer = CreateRenderer(emu_window, system, *context);
+
+ auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
if (!renderer->Init()) {
return nullptr;
}
- if (Settings::values.use_asynchronous_gpu_emulation) {
- return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer),
- std::move(context));
- }
- return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context));
+ gpu->BindRenderer(std::move(renderer));
+ return gpu;
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
return static_cast<u16>(
- Settings::values.resolution_factor != 0
- ? Settings::values.resolution_factor
+ Settings::values.resolution_factor.GetValue() != 0
+ ? Settings::values.resolution_factor.GetValue()
: renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
}