summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp31
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h28
-rw-r--r--src/video_core/shader/decode/memory.cpp1
-rw-r--r--src/video_core/shader/decode/texture.cpp11
11 files changed, 74 insertions, 34 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 48d5c4a5e..1ae5f1d62 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -239,6 +239,7 @@ Device::Device() {
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
+ has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -275,6 +276,7 @@ Device::Device(std::nullptr_t) {
has_image_load_formatted = true;
has_texture_shadow_lod = true;
has_variable_aoffi = true;
+ has_depth_buffer_float = true;
}
bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee053776d..f24bd0c7b 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -122,6 +122,10 @@ public:
return use_driver_cache;
}
+ bool HasDepthBufferFloat() const {
+ return has_depth_buffer_float;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -150,6 +154,7 @@ private:
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
+ bool has_depth_buffer_float{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 418644108..4610fd160 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -889,7 +889,11 @@ void RasterizerOpenGL::SyncViewport() {
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
- glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ if (device.HasDepthBufferFloat()) {
+ glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
+ } else {
+ glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ }
if (!GLAD_GL_NV_viewport_swizzle) {
continue;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 529570ff0..5cf7cd151 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -335,6 +335,10 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
const VideoCore::DiskResourceLoadCallback& callback) {
disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
+
+ LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
+ transferable.has_value() ? transferable->size() : 0);
+
if (!transferable) {
return;
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 848eedd66..668633e7b 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -201,10 +201,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
});
}
-void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
- update_descriptor_queue.AddBuffer(buffer, offset, size);
-}
-
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
if (num_indices <= current_num_indices) {
return;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 041e6515c..982e92191 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,6 +8,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -16,7 +17,6 @@ namespace Vulkan {
class Device;
class VKDescriptorPool;
class VKScheduler;
-class VKUpdateDescriptorQueue;
class BufferCacheRuntime;
@@ -86,7 +86,9 @@ public:
}
private:
- void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
+ void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ update_descriptor_queue.AddBuffer(buffer, offset, size);
+ }
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 40e2e0d38..c6846d886 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1845,13 +1845,21 @@ private:
Expression TextureGather(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
const Id coords = GetCoordinates(operation, Type::Float);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id texture{};
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
+ }
+
if (meta.sampler.is_shadow) {
texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
- AsFloat(Visit(meta.depth_compare)));
+ AsFloat(Visit(meta.depth_compare)), mask, operands);
} else {
u32 component_value = 0;
if (meta.component) {
@@ -1860,7 +1868,7 @@ private:
component_value = component->GetValue();
}
texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
- Constant(t_uint, component_value));
+ Constant(t_uint, component_value), mask, operands);
}
return GetTextureElement(operation, texture, Type::Float);
}
@@ -1928,13 +1936,22 @@ private:
const Id image = GetTextureImage(operation);
const Id coords = GetCoordinates(operation, Type::Int);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id fetch;
+
if (meta.lod && !meta.sampler.is_buffer) {
- fetch = OpImageFetch(t_float4, image, coords, spv::ImageOperandsMask::Lod,
- AsInt(Visit(meta.lod)));
- } else {
- fetch = OpImageFetch(t_float4, image, coords);
+ mask = mask | spv::ImageOperandsMask::Lod;
+ operands.push_back(AsInt(Visit(meta.lod)));
+ }
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
}
+
+ fetch = OpImageFetch(t_float4, image, coords, mask, operands);
return GetTextureElement(operation, fetch, Type::Float);
}
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index f99273c6a..dc45fdcb1 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -20,20 +20,20 @@ VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKSchedu
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
void VKUpdateDescriptorQueue::TickFrame() {
- payload.clear();
+ payload_cursor = payload.data();
}
void VKUpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
- static constexpr std::size_t MIN_ENTRIES = 0x400;
+ static constexpr size_t MIN_ENTRIES = 0x400;
- if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
+ if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
- payload.clear();
+ payload_cursor = payload.data();
}
- upload_start = &*payload.end();
+ upload_start = payload_cursor;
}
void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index e214f7195..d35e77c44 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -4,8 +4,7 @@
#pragma once
-#include <variant>
-#include <boost/container/static_vector.hpp>
+#include <array>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -16,13 +15,15 @@ class Device;
class VKScheduler;
struct DescriptorUpdateEntry {
- DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
+ struct Empty {};
+ DescriptorUpdateEntry() = default;
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
-
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
union {
+ Empty empty{};
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
VkBufferView texel_buffer;
@@ -41,39 +42,40 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
void AddImage(VkImageView image_view) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
- void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
- payload.emplace_back(VkDescriptorBufferInfo{
+ void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
+ *(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
- });
+ };
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- payload.emplace_back(texel_buffer);
+ *(payload_cursor++) = texel_buffer;
}
private:
const Device& device;
VKScheduler& scheduler;
+ DescriptorUpdateEntry* payload_cursor = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
- boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+ std::array<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 50f4e7d35..7728f600e 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -330,6 +330,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
case StoreType::Bits32:
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
+ case StoreType::Unsigned16:
case StoreType::Signed16: {
Node address = GetAddress(0);
Node memory = (this->*get_memory)(address);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 833fa2a39..c69681e8d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -806,6 +806,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+ const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
@@ -820,17 +821,23 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
- coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+ coords.push_back(
+ GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
+ std::vector<Node> aoffi;
+ if (aoffi_enabled) {
+ aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
+ }
+
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
+ MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;