diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 31 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_update_descriptor.cpp | 10 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_update_descriptor.h | 28 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 1 | ||||
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 11 |
11 files changed, 74 insertions, 34 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 48d5c4a5e..1ae5f1d62 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -239,6 +239,7 @@ Device::Device() { has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); + has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" @@ -275,6 +276,7 @@ Device::Device(std::nullptr_t) { has_image_load_formatted = true; has_texture_shadow_lod = true; has_variable_aoffi = true; + has_depth_buffer_float = true; } bool Device::TestVariableAoffi() { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee053776d..f24bd0c7b 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -122,6 +122,10 @@ public: return use_driver_cache; } + bool HasDepthBufferFloat() const { + return has_depth_buffer_float; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -150,6 +154,7 @@ private: bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; bool use_driver_cache{}; + bool has_depth_buffer_float{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 418644108..4610fd160 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -889,7 +889,11 @@ void RasterizerOpenGL::SyncViewport() { const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; const GLdouble far_depth = src.translate_z + src.scale_z; - glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + if (device.HasDepthBufferFloat()) { + glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth); + } else { + glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + } if (!GLAD_GL_NV_viewport_swizzle) { continue; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 529570ff0..5cf7cd151 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -335,6 +335,10 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop const VideoCore::DiskResourceLoadCallback& callback) { disk_cache.BindTitleID(title_id); const std::optional transferable = disk_cache.LoadTransferable(); + + LOG_INFO(Render_OpenGL, "Total Shader Count: {}", + transferable.has_value() ? transferable->size() : 0); + if (!transferable) { return; } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 848eedd66..668633e7b 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -201,10 +201,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, }); } -void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) { - update_descriptor_queue.AddBuffer(buffer, offset, size); -} - void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { if (num_indices <= current_num_indices) { return; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 041e6515c..982e92191 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -8,6 +8,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -16,7 +17,6 @@ namespace Vulkan { class Device; class VKDescriptorPool; class VKScheduler; -class VKUpdateDescriptorQueue; class BufferCacheRuntime; @@ -86,7 +86,9 @@ public: } private: - void BindBuffer(VkBuffer buffer, u32 offset, u32 size); + void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { + update_descriptor_queue.AddBuffer(buffer, offset, size); + } void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 40e2e0d38..c6846d886 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1845,13 +1845,21 @@ private: Expression TextureGather(Operation operation) { const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); const Id coords = GetCoordinates(operation, Type::Float); + + spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; + std::vector<Id> operands; Id texture{}; + + if (!meta.aoffi.empty()) { + mask = mask | spv::ImageOperandsMask::Offset; + operands.push_back(GetOffsetCoordinates(operation)); + } + if (meta.sampler.is_shadow) { texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare))); + AsFloat(Visit(meta.depth_compare)), mask, operands); } else { u32 component_value = 0; if (meta.component) { @@ -1860,7 +1868,7 @@ private: component_value = component->GetValue(); } texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value)); + Constant(t_uint, component_value), mask, operands); } return GetTextureElement(operation, texture, Type::Float); } @@ -1928,13 +1936,22 @@ private: const Id image = GetTextureImage(operation); const Id coords = GetCoordinates(operation, Type::Int); + + spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; + std::vector<Id> operands; Id fetch; + if (meta.lod && !meta.sampler.is_buffer) { - fetch = OpImageFetch(t_float4, image, coords, spv::ImageOperandsMask::Lod, - AsInt(Visit(meta.lod))); - } else { - fetch = OpImageFetch(t_float4, image, coords); + mask = mask | spv::ImageOperandsMask::Lod; + operands.push_back(AsInt(Visit(meta.lod))); + } + + if (!meta.aoffi.empty()) { + mask = mask | spv::ImageOperandsMask::Offset; + operands.push_back(GetOffsetCoordinates(operation)); } + + fetch = OpImageFetch(t_float4, image, coords, mask, operands); return GetTextureElement(operation, fetch, Type::Float); } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index f99273c6a..dc45fdcb1 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -20,20 +20,20 @@ VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKSchedu VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; void VKUpdateDescriptorQueue::TickFrame() { - payload.clear(); + payload_cursor = payload.data(); } void VKUpdateDescriptorQueue::Acquire() { // Minimum number of entries required. // This is the maximum number of entries a single draw call migth use. - static constexpr std::size_t MIN_ENTRIES = 0x400; + static constexpr size_t MIN_ENTRIES = 0x400; - if (payload.size() + MIN_ENTRIES >= payload.max_size()) { + if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); - payload.clear(); + payload_cursor = payload.data(); } - upload_start = &*payload.end(); + upload_start = payload_cursor; } void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index e214f7195..d35e77c44 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -4,8 +4,7 @@ #pragma once -#include <variant> -#include <boost/container/static_vector.hpp> +#include <array> #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -16,13 +15,15 @@ class Device; class VKScheduler; struct DescriptorUpdateEntry { - DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} + struct Empty {}; + DescriptorUpdateEntry() = default; + DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {} - DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {} union { + Empty empty{}; VkDescriptorImageInfo image; VkDescriptorBufferInfo buffer; VkBufferView texel_buffer; @@ -41,39 +42,40 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); void AddSampledImage(VkImageView image_view, VkSampler sampler) { - payload.emplace_back(VkDescriptorImageInfo{ + *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = sampler, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }); + }; } void AddImage(VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{ + *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = VK_NULL_HANDLE, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }); + }; } - void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { - payload.emplace_back(VkDescriptorBufferInfo{ + void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) { + *(payload_cursor++) = VkDescriptorBufferInfo{ .buffer = buffer, .offset = offset, .range = size, - }); + }; } void AddTexelBuffer(VkBufferView texel_buffer) { - payload.emplace_back(texel_buffer); + *(payload_cursor++) = texel_buffer; } private: const Device& device; VKScheduler& scheduler; + DescriptorUpdateEntry* payload_cursor = nullptr; const DescriptorUpdateEntry* upload_start = nullptr; - boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; + std::array<DescriptorUpdateEntry, 0x10000> payload; }; } // namespace Vulkan diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 50f4e7d35..7728f600e 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -330,6 +330,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Unsigned16: case StoreType::Signed16: { Node address = GetAddress(0); Node memory = (this->*get_memory)(address); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 833fa2a39..c69681e8d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -806,6 +806,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; + const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); @@ -820,17 +821,23 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is std::vector<Node> coords; for (std::size_t i = 0; i < type_coord_count; ++i) { const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + coords.push_back( + GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); } const Node array = is_array ? GetRegister(array_register) : nullptr; // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); + std::vector<Node> aoffi; + if (aoffi_enabled) { + aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); + } + Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; + MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; |