summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/kernel/resource_limit.cpp6
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h1
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/engines/maxwell_3d.h3
-rw-r--r--src/video_core/rasterizer_cache.cpp7
-rw-r--r--src/video_core/rasterizer_cache.h253
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp125
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp87
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h51
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp53
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp72
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp27
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h33
-rw-r--r--src/video_core/shader/decode/texture.cpp55
-rw-r--r--src/video_core/shader/node.h75
-rw-r--r--src/video_core/shader/node_helper.h2
-rw-r--r--src/video_core/shader/registry.cpp20
-rw-r--r--src/video_core/shader/registry.h35
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp78
-rw-r--r--src/video_core/shader_cache.h228
-rw-r--r--src/video_core/texture_cache/surface_base.cpp7
-rw-r--r--src/video_core/texture_cache/surface_base.h13
-rw-r--r--src/video_core/texture_cache/surface_params.cpp19
-rw-r--r--src/video_core/texture_cache/texture_cache.h119
39 files changed, 941 insertions, 671 deletions
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index d9beaa3a4..212e442f4 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
const std::size_t index{ResourceTypeToIndex(resource)};
s64 new_value = current[index] + amount;
- while (new_value > limit[index] && available[index] + amount <= limit[index]) {
+ if (new_value > limit[index] && available[index] + amount <= limit[index]) {
// TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
new_value = current[index] + amount;
-
- if (timeout >= 0) {
- break;
- }
}
if (new_value <= limit[index]) {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2bf8d68ce..39d5d8401 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -49,8 +49,6 @@ add_library(video_core STATIC
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
- rasterizer_cache.cpp
- rasterizer_cache.h
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
@@ -93,6 +91,7 @@ add_library(video_core STATIC
renderer_opengl/utils.h
sampler_cache.cpp
sampler_cache.h
+ shader_cache.h
shader/decode/arithmetic.cpp
shader/decode/arithmetic_immediate.cpp
shader/decode/bfe.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ebe139504..f46e81bb7 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,6 +93,7 @@ public:
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0;
+ virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
virtual u32 GetBoundBuffer() const = 0;
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f6237fc6a..a82b06a38 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
ASSERT(stage == ShaderType::Compute);
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
+ return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
+ const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 18ceedfaf..b7f668d88 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -219,6 +219,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
+ SamplerDescriptor AccessSampler(u32 handle) const override;
+
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e46b153f9..ea3c8a963 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -740,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
+ return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
+ const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index b827b112f..d5fe25065 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -598,6 +598,7 @@ public:
BitField<4, 3, u32> block_height;
BitField<8, 3, u32> block_depth;
BitField<12, 1, InvMemoryLayout> type;
+ BitField<16, 1, u32> is_3d;
} memory_layout;
union {
BitField<0, 16, u32> layers;
@@ -1403,6 +1404,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
+ SamplerDescriptor AccessSampler(u32 handle) const override;
+
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4..000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/rasterizer_cache.h"
-
-RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 096ee337c..000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <mutex>
-#include <set>
-#include <unordered_map>
-
-#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range_core.hpp>
-
-#include "common/common_types.h"
-#include "core/settings.h"
-#include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
-
-class RasterizerCacheObject {
-public:
- explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
-
- virtual ~RasterizerCacheObject();
-
- VAddr GetCpuAddr() const {
- return cpu_addr;
- }
-
- /// Gets the size of the shader in guest memory, required for cache management
- virtual std::size_t GetSizeInBytes() const = 0;
-
- /// Sets whether the cached object should be considered registered
- void SetIsRegistered(bool registered) {
- is_registered = registered;
- }
-
- /// Returns true if the cached object is registered
- bool IsRegistered() const {
- return is_registered;
- }
-
- /// Returns true if the cached object is dirty
- bool IsDirty() const {
- return is_dirty;
- }
-
- /// Returns ticks from when this cached object was last modified
- u64 GetLastModifiedTicks() const {
- return last_modified_ticks;
- }
-
- /// Marks an object as recently modified, used to specify whether it is clean or dirty
- template <class T>
- void MarkAsModified(bool dirty, T& cache) {
- is_dirty = dirty;
- last_modified_ticks = cache.GetModifiedTicks();
- }
-
- void SetMemoryMarked(bool is_memory_marked_) {
- is_memory_marked = is_memory_marked_;
- }
-
- bool IsMemoryMarked() const {
- return is_memory_marked;
- }
-
- void SetSyncPending(bool is_sync_pending_) {
- is_sync_pending = is_sync_pending_;
- }
-
- bool IsSyncPending() const {
- return is_sync_pending;
- }
-
-private:
- bool is_registered{}; ///< Whether the object is currently registered with the cache
- bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
- bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
- bool is_sync_pending{}; ///< Whether the object is pending deletion.
- u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
- VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
-};
-
-template <class T>
-class RasterizerCache : NonCopyable {
- friend class RasterizerCacheObject;
-
-public:
- explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
-
- /// Write any cached resources overlapping the specified region back to memory
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
-
- const auto& objects{GetSortedObjectsFromRegion(addr, size)};
- for (auto& object : objects) {
- FlushObject(object);
- }
- }
-
- /// Mark the specified region as being invalidated
- void InvalidateRegion(VAddr addr, u64 size) {
- std::lock_guard lock{mutex};
-
- const auto& objects{GetSortedObjectsFromRegion(addr, size)};
- for (auto& object : objects) {
- if (!object->IsRegistered()) {
- // Skip duplicates
- continue;
- }
- Unregister(object);
- }
- }
-
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
-
- for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
- if (object->IsRegistered()) {
- UnmarkMemory(object);
- object->SetSyncPending(true);
- marked_for_unregister.emplace_back(object);
- }
- }
- }
-
- void SyncGuestHost() {
- std::lock_guard lock{mutex};
-
- for (const auto& object : marked_for_unregister) {
- if (object->IsRegistered()) {
- object->SetSyncPending(false);
- Unregister(object);
- }
- }
- marked_for_unregister.clear();
- }
-
- /// Invalidates everything in the cache
- void InvalidateAll() {
- std::lock_guard lock{mutex};
-
- while (interval_cache.begin() != interval_cache.end()) {
- Unregister(*interval_cache.begin()->second.begin());
- }
- }
-
-protected:
- /// Tries to get an object from the cache with the specified cache address
- T TryGet(VAddr addr) const {
- const auto iter = map_cache.find(addr);
- if (iter != map_cache.end())
- return iter->second;
- return nullptr;
- }
-
- /// Register an object into the cache
- virtual void Register(const T& object) {
- std::lock_guard lock{mutex};
-
- object->SetIsRegistered(true);
- interval_cache.add({GetInterval(object), ObjectSet{object}});
- map_cache.insert({object->GetCpuAddr(), object});
- rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
- object->SetMemoryMarked(true);
- }
-
- /// Unregisters an object from the cache
- virtual void Unregister(const T& object) {
- std::lock_guard lock{mutex};
-
- UnmarkMemory(object);
- object->SetIsRegistered(false);
- if (object->IsSyncPending()) {
- marked_for_unregister.remove(object);
- object->SetSyncPending(false);
- }
- const VAddr addr = object->GetCpuAddr();
- interval_cache.subtract({GetInterval(object), ObjectSet{object}});
- map_cache.erase(addr);
- }
-
- void UnmarkMemory(const T& object) {
- if (!object->IsMemoryMarked()) {
- return;
- }
- rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
- object->SetMemoryMarked(false);
- }
-
- /// Returns a ticks counter used for tracking when cached objects were last modified
- u64 GetModifiedTicks() {
- std::lock_guard lock{mutex};
-
- return ++modified_ticks;
- }
-
- virtual void FlushObjectInner(const T& object) = 0;
-
- /// Flushes the specified object, updating appropriate cache state as needed
- void FlushObject(const T& object) {
- std::lock_guard lock{mutex};
-
- if (!object->IsDirty()) {
- return;
- }
- FlushObjectInner(object);
- object->MarkAsModified(false, *this);
- }
-
- std::recursive_mutex mutex;
-
-private:
- /// Returns a list of cached objects from the specified memory region, ordered by access time
- std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
- if (size == 0) {
- return {};
- }
-
- std::vector<T> objects;
- const ObjectInterval interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
- for (auto& cached_object : pair.second) {
- if (!cached_object) {
- continue;
- }
- objects.push_back(cached_object);
- }
- }
-
- std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
- return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
- });
-
- return objects;
- }
-
- using ObjectSet = std::set<T>;
- using ObjectCache = std::unordered_map<VAddr, T>;
- using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
- using ObjectInterval = typename IntervalCache::interval_type;
-
- static auto GetInterval(const T& object) {
- return ObjectInterval::right_open(object->GetCpuAddr(),
- object->GetCpuAddr() + object->GetSizeInBytes());
- }
-
- ObjectCache map_cache;
- IntervalCache interval_cache; ///< Cache of objects
- u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
- VideoCore::RasterizerInterface& rasterizer;
- std::list<T> marked_for_unregister;
-};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7..679b9b1d7 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a14641b97..890fc6c63 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -214,7 +214,8 @@ Device::Device()
has_precise_bug = TestPreciseBug();
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
- GLAD_GL_NV_compute_program5;
+ GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
+ GLAD_GL_NV_transform_feedback2;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 5673e30b3..2d6c11320 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/shader_cache.h"
namespace OpenGL {
@@ -65,10 +66,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
ShaderType shader_type, std::size_t index = 0) {
+ if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+ if (entry.is_separated) {
+ const u32 buffer_1 = entry.buffer;
+ const u32 buffer_2 = entry.secondary_buffer;
+ const u32 offset_1 = entry.offset;
+ const u32 offset_2 = entry.secondary_offset;
+ const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
+ return engine.GetTextureInfo(handle_1 | handle_2);
+ }
+ }
if (entry.is_bindless) {
- const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return engine.GetTextureInfo(tex_handle);
+ const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+ return engine.GetTextureInfo(handle);
}
+
const auto& gpu_profile = engine.AccessGuestDriverProfile();
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -93,6 +106,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
return buffer.size;
}
+/// Translates hardware transform feedback indices
+/// @param location Hardware location
+/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
+/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
+std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
+ const u8 index = location / 4;
+ if (index >= 8 && index <= 39) {
+ return {GL_GENERIC_ATTRIB_NV, index - 8};
+ }
+ if (index >= 48 && index <= 55) {
+ return {GL_TEXTURE_COORD_NV, index - 48};
+ }
+ switch (index) {
+ case 7:
+ return {GL_POSITION, 0};
+ case 40:
+ return {GL_PRIMARY_COLOR_NV, 0};
+ case 41:
+ return {GL_SECONDARY_COLOR_NV, 0};
+ case 42:
+ return {GL_BACK_PRIMARY_COLOR_NV, 0};
+ case 43:
+ return {GL_BACK_SECONDARY_COLOR_NV, 0};
+ }
+ UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
+ return {GL_POSITION, 0};
+}
+
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
@@ -282,7 +323,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
continue;
}
- Shader shader{shader_cache.GetStageProgram(program)};
+ Shader* const shader = shader_cache.GetStageProgram(program);
if (device.UseAssemblyShaders()) {
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -851,7 +892,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
static constexpr std::array PARAMETER_LUT = {
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
@@ -881,7 +922,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
}
}
-void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const auto& entries = kernel->GetEntries();
@@ -950,7 +991,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
}
}
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
@@ -965,7 +1006,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
}
}
-void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -988,7 +1029,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
static_cast<GLsizeiptr>(size));
}
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -1001,7 +1042,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
}
}
-void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
@@ -1030,7 +1071,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
}
}
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetEntries().images) {
@@ -1040,7 +1081,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
}
}
-void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
+void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : shader->GetEntries().images) {
@@ -1556,12 +1597,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
}
+void RasterizerOpenGL::SyncTransformFeedback() {
+ // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
+ // when this is required.
+ const auto& regs = system.GPU().Maxwell3D().regs;
+
+ static constexpr std::size_t STRIDE = 3;
+ std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
+ std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
+
+ GLint* cursor = attribs.data();
+ GLint* current_stream = streams.data();
+
+ for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
+ const auto& layout = regs.tfb_layouts[feedback];
+ UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
+ if (layout.varying_count == 0) {
+ continue;
+ }
+
+ *current_stream = static_cast<GLint>(feedback);
+ if (current_stream != streams.data()) {
+ // When stepping one stream, push the expected token
+ cursor[0] = GL_NEXT_BUFFER_NV;
+ cursor[1] = 0;
+ cursor[2] = 0;
+ cursor += STRIDE;
+ }
+ ++current_stream;
+
+ const auto& locations = regs.tfb_varying_locs[feedback];
+ std::optional<u8> current_index;
+ for (u32 offset = 0; offset < layout.varying_count; ++offset) {
+ const u8 location = locations[offset];
+ const u8 index = location / 4;
+
+ if (current_index == index) {
+ // Increase number of components of the previous attachment
+ ++cursor[-2];
+ continue;
+ }
+ current_index = index;
+
+ std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
+ cursor[1] = 1;
+ cursor += STRIDE;
+ }
+ }
+
+ const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
+ const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
+ glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
+ GL_INTERLEAVED_ATTRIBS);
+}
+
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
+ if (device.UseAssemblyShaders()) {
+ SyncTransformFeedback();
+ }
+
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1588,6 +1687,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
static_cast<GLsizeiptr>(size));
}
+ // We may have to call BeginTransformFeedbackNV here since they seem to call different
+ // implementations on Nvidia's driver (the pointer is different) but we are using
+ // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
+ // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
glBeginTransformFeedback(GL_POINTS);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f5dc56a0e..4f082592f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_accelerated.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
@@ -100,10 +99,10 @@ private:
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
/// Configures the current constbuffers to use for the draw command.
- void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
+ void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
/// Configures the current constbuffers to use for the kernel invocation.
- void SetupComputeConstBuffers(const Shader& kernel);
+ void SetupComputeConstBuffers(Shader* kernel);
/// Configures a constant buffer.
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
@@ -111,30 +110,30 @@ private:
std::size_t unified_offset);
/// Configures the current global memory entries to use for the draw command.
- void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
+ void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
/// Configures the current global memory entries to use for the kernel invocation.
- void SetupComputeGlobalMemory(const Shader& kernel);
+ void SetupComputeGlobalMemory(Shader* kernel);
/// Configures a constant buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size);
/// Configures the current textures to use for the draw command.
- void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
+ void SetupDrawTextures(std::size_t stage_index, Shader* shader);
/// Configures the textures used in a compute shader.
- void SetupComputeTextures(const Shader& kernel);
+ void SetupComputeTextures(Shader* kernel);
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const SamplerEntry& entry);
/// Configures images in a graphics shader.
- void SetupDrawImages(std::size_t stage_index, const Shader& shader);
+ void SetupDrawImages(std::size_t stage_index, Shader* shader);
/// Configures images in a compute shader.
- void SetupComputeImages(const Shader& shader);
+ void SetupComputeImages(Shader* shader);
/// Configures an image.
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
@@ -202,6 +201,10 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
+ /// Syncs transform feedback state to match guest state
+ /// @note Only valid on assembly shaders
+ void SyncTransformFeedback();
+
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a991ca64a..c28486b1d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -29,6 +29,7 @@
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace OpenGL {
@@ -194,12 +195,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
-CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
- std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, ProgramSharedPtr program_)
- : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
- size_in_bytes{size_in_bytes}, program{std::move(program_)} {
- // Assign either the assembly program or source program. We can't have both.
+Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
+ ProgramSharedPtr program_)
+ : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
handle = program->assembly_program.handle;
if (handle == 0) {
handle = program->source_program.handle;
@@ -207,16 +205,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
ASSERT(handle != 0);
}
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
-GLuint CachedShader::GetHandle() const {
+GLuint Shader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent());
return handle;
}
-Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type, ProgramCode code,
- ProgramCode code_b) {
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ ProgramCode code, ProgramCode code_b) {
const auto shader_type = GetShaderType(program_type);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
@@ -241,12 +239,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
- return std::shared_ptr<CachedShader>(
- new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
- MakeEntries(params.device, ir, shader_type), std::move(program)));
+ return std::unique_ptr<Shader>(new Shader(
+ std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
}
-Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
+std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
+ ProgramCode code) {
const std::size_t size_in_bytes = code.size() * sizeof(u64);
auto& engine = params.system.GPU().KeplerCompute();
@@ -266,23 +264,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
- return std::shared_ptr<CachedShader>(
- new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
- MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
+ return std::unique_ptr<Shader>(new Shader(std::move(registry),
+ MakeEntries(params.device, ir, ShaderType::Compute),
+ std::move(program)));
}
-Shader CachedShader::CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader,
- std::size_t size_in_bytes) {
- return std::shared_ptr<CachedShader>(
- new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
- precompiled_shader.entries, precompiled_shader.program));
+std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader) {
+ return std::unique_ptr<Shader>(new Shader(
+ precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device)
- : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
- disk_cache{system} {}
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
+ emu_window{emu_window}, device{device}, disk_cache{system} {}
+
+ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -436,7 +434,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
return program;
}
-Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
return last_shaders[static_cast<std::size_t>(program)];
}
@@ -446,8 +444,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
// Look up shader in the cache based on address
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
- Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
- if (shader) {
+ if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
@@ -468,30 +465,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const ShaderParameters params{system, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
+ std::unique_ptr<Shader> shader;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
- shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
- std::move(code_b));
+ shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
} else {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+ shader = Shader::CreateFromCache(params, found->second);
}
+ Shader* const result = shader.get();
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
} else {
- null_shader = shader;
+ null_shader = std::move(shader);
}
- return last_shaders[static_cast<std::size_t>(program)] = shader;
+ return last_shaders[static_cast<std::size_t>(program)] = result;
}
-Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
- auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
- if (kernel) {
+ if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel;
}
@@ -503,20 +499,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
const ShaderParameters params{system, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
+ std::unique_ptr<Shader> kernel;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
- kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
+ kernel = Shader::CreateKernelFromMemory(params, std::move(code));
} else {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+ kernel = Shader::CreateFromCache(params, found->second);
}
+ Shader* const result = kernel.get();
if (cpu_addr) {
- Register(kernel);
+ Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
} else {
- null_kernel = kernel;
+ null_kernel = std::move(kernel);
}
- return kernel;
+ return result;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b2ae8d7f9..6848f1388 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace Core {
class System;
@@ -35,12 +35,10 @@ class EmuWindow;
namespace OpenGL {
-class CachedShader;
class Device;
class RasterizerOpenGL;
struct UnspecializedShader;
-using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ProgramHandle {
@@ -64,62 +62,53 @@ struct ShaderParameters {
u64 unique_identifier;
};
-class CachedShader final : public RasterizerCacheObject {
+class Shader final {
public:
- ~CachedShader();
+ ~Shader();
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
- /// Returns the size in bytes of the shader
- std::size_t GetSizeInBytes() const override {
- return size_in_bytes;
- }
-
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
- static Shader CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b);
- static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
+ static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ ProgramCode program_code,
+ ProgramCode program_code_b);
+ static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
+ ProgramCode code);
- static Shader CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader,
- std::size_t size_in_bytes);
+ static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader);
private:
- explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
- std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, ProgramSharedPtr program);
+ explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
+ ProgramSharedPtr program);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
- std::size_t size_in_bytes = 0;
ProgramSharedPtr program;
GLuint handle = 0;
};
-class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
+class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device);
+ ~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
- Shader GetStageProgram(Maxwell::ShaderProgram program);
+ Shader* GetStageProgram(Maxwell::ShaderProgram program);
/// Gets a compute kernel in the passed address
- Shader GetComputeKernel(GPUVAddr code_addr);
-
-protected:
- // We do not have to flush this cache as things in it are never modified by us.
- void FlushObjectInner(const Shader& object) override {}
+ Shader* GetComputeKernel(GPUVAddr code_addr);
private:
ProgramSharedPtr GeneratePrecompiledProgram(
@@ -132,10 +121,10 @@ private:
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
- Shader null_shader{};
- Shader null_kernel{};
+ std::unique_ptr<Shader> null_shader;
+ std::unique_ptr<Shader> null_kernel;
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..653c3f2f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
namespace {
+using VideoCommon::Shader::SeparateSamplerKey;
+
using ShaderCacheVersionHash = std::array<u8, 64>;
struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
u32 value = 0;
};
-struct BoundSamplerKey {
+struct BoundSamplerEntry {
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
-struct BindlessSamplerKey {
+struct SeparateSamplerEntry {
+ u32 cbuf1 = 0;
+ u32 cbuf2 = 0;
+ u32 offset1 = 0;
+ u32 offset2 = 0;
+ Tegra::Engines::SamplerDescriptor sampler;
+};
+
+struct BindlessSamplerEntry {
u32 cbuf = 0;
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
-constexpr u32 NativeVersion = 20;
+constexpr u32 NativeVersion = 21;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
+ u32 num_separate_samplers;
u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
+ file.ReadArray(&num_separate_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false;
}
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
}
std::vector<ConstBufferKey> flat_keys(num_keys);
- std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
- std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
+ std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
+ std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
+ std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
flat_bound_samplers.size() ||
+ file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
+ flat_separate_samplers.size() ||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
flat_bindless_samplers.size()) {
return false;
}
- for (const auto& key : flat_keys) {
- keys.insert({{key.cbuf, key.offset}, key.value});
+ for (const auto& entry : flat_keys) {
+ keys.insert({{entry.cbuf, entry.offset}, entry.value});
}
- for (const auto& key : flat_bound_samplers) {
- bound_samplers.emplace(key.offset, key.sampler);
+ for (const auto& entry : flat_bound_samplers) {
+ bound_samplers.emplace(entry.offset, entry.sampler);
}
- for (const auto& key : flat_bindless_samplers) {
- bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+ for (const auto& entry : flat_separate_samplers) {
+ SeparateSamplerKey key;
+ key.buffers = {entry.cbuf1, entry.cbuf2};
+ key.offsets = {entry.offset1, entry.offset2};
+ separate_samplers.emplace(key, entry.sampler);
+ }
+ for (const auto& entry : flat_bindless_samplers) {
+ bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
}
return true;
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
return false;
}
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
}
- std::vector<BoundSamplerKey> flat_bound_samplers;
+ std::vector<BoundSamplerEntry> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) {
- flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
+ flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
+ }
+
+ std::vector<SeparateSamplerEntry> flat_separate_samplers;
+ flat_separate_samplers.reserve(separate_samplers.size());
+ for (const auto& [key, sampler] : separate_samplers) {
+ SeparateSamplerEntry entry;
+ std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
+ std::tie(entry.offset1, entry.offset2) = key.offsets;
+ entry.sampler = sampler;
+ flat_separate_samplers.push_back(entry);
}
- std::vector<BindlessSamplerKey> flat_bindless_samplers;
+ std::vector<BindlessSamplerEntry> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back(
- BindlessSamplerKey{address.first, address.second, sampler});
+ BindlessSamplerEntry{address.first, address.second, sampler});
}
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
flat_bound_samplers.size() &&
+ file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
+ flat_separate_samplers.size() &&
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
flat_bindless_samplers.size();
}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..a79cef0e9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
+ VideoCommon::Shader::SeparateSamplerMap separate_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 57db5a08b..61505879b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
target = GetTextureTarget(params.target);
texture = CreateTexture(params, target, internal_format, texture_buffer);
DecorateSurfaceName();
- main_view = CreateViewInner(
- ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
- true);
+
+ u32 num_layers = 1;
+ if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
+ num_layers = params.depth;
+ }
+
+ main_view =
+ CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
}
CachedSurface::~CachedSurface() = default;
@@ -413,20 +418,23 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
CachedSurfaceView::~CachedSurfaceView() = default;
-void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
+void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
ASSERT(params.num_levels == 1);
+ if (params.target == SurfaceTarget::Texture3D) {
+ if (params.num_layers > 1) {
+ ASSERT(params.base_layer == 0);
+ glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
+ } else {
+ glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
+ params.base_level, params.base_layer);
+ }
+ return;
+ }
+
if (params.num_layers > 1) {
- // Layered framebuffer attachments
UNIMPLEMENTED_IF(params.base_layer != 0);
-
- switch (params.target) {
- case SurfaceTarget::Texture2DArray:
- glFramebufferTexture(target, attachment, GetTexture(), 0);
- break;
- default:
- UNIMPLEMENTED();
- }
+ glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
return;
}
@@ -434,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
const GLuint texture = surface.GetTexture();
switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
+ glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
+ glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(target, attachment, texture, params.base_level,
+ glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
@@ -500,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
OGLTextureView texture_view;
texture_view.Create();
- glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
- params.num_levels, params.base_layer, params.num_layers);
+ if (target == GL_TEXTURE_3D) {
+ glTextureView(texture_view.handle, target, surface.texture.handle, format,
+ params.base_level, params.num_levels, 0, 1);
+ } else {
+ glTextureView(texture_view.handle, target, surface.texture.handle, format,
+ params.base_level, params.num_levels, params.base_layer, params.num_layers);
+ }
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
return texture_view;
@@ -544,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()};
- UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
- UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
+ UNIMPLEMENTED_IF(src_params.depth != 1);
+ UNIMPLEMENTED_IF(dst_params.depth != 1);
state_tracker.NotifyScissor0();
state_tracker.NotifyFramebuffer();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8a2ac8603..bfc4ddf5d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -80,8 +80,10 @@ public:
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
~CachedSurfaceView();
- /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
- void Attach(GLenum attachment, GLenum target) const;
+ /// @brief Attaches this texture view to the currently bound fb_target framebuffer
+ /// @param attachment Attachment to bind textures to
+ /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
+ void Attach(GLenum attachment, GLenum fb_target) const;
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e7952924a..6214fcbc3 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -751,11 +751,9 @@ void RendererOpenGL::RenderScreenshot() {
}
bool RendererOpenGL::Init() {
- if (GLAD_GL_KHR_debug) {
+ if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
- if (Settings::values.renderer_debug) {
- glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
- }
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..65cb3c8ad 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index b8ccf164f..ea66e621e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h"
+#include "video_core/shader_cache.h"
namespace Vulkan {
@@ -132,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
-CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
- GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
- u32 main_offset)
- : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
+ : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
-Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
- Core::System& system, Tegra::Engines::ShaderType stage) {
- if (stage == Tegra::Engines::ShaderType::Compute) {
+Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
+ Tegra::Engines::ShaderType stage) {
+ if (stage == ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
@@ -156,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache)
- : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
- descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
- renderpass_cache{renderpass_cache} {}
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
+ scheduler{scheduler}, descriptor_pool{descriptor_pool},
+ update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
VKPipelineCache::~VKPipelineCache() = default;
-std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D();
- std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -178,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
- if (!shader) {
+
+ Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
+ if (!result) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
// No shader found - create a new one
constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
- const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+ const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
+ stage_offset);
+ result = shader.get();
- shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
- std::move(code), stage_offset);
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader), *cpu_addr, size_in_bytes);
} else {
- null_shader = shader;
+ null_shader = std::move(shader);
}
}
- shaders[index] = std::move(shader);
+ shaders[index] = result;
}
return last_shaders = shaders;
}
@@ -236,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
+ Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
- shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
- program_addr, *cpu_addr, std::move(code),
- KERNEL_MAIN_OFFSET);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
+ std::move(code), KERNEL_MAIN_OFFSET);
+ shader = shader_info.get();
+
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader_info), *cpu_addr, size_in_bytes);
} else {
- null_kernel = shader;
+ null_kernel = std::move(shader_info);
}
}
@@ -264,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
return *entry;
}
-void VKPipelineCache::Unregister(const Shader& shader) {
+void VKPipelineCache::OnShaderRemoval(Shader* shader) {
bool finished = false;
const auto Finish = [&] {
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -296,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
Finish();
it = compute_cache.erase(it);
}
-
- RasterizerCache::Unregister(shader);
}
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -332,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
}
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
- const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
- const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
- ASSERT(shader);
+ const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
- const auto program_type = GetShaderType(program_enum);
+ const ShaderType program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..0a36e5112 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace Core {
class System;
@@ -41,8 +41,6 @@ class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
-class CachedShader;
-using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey {
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
-class CachedShader final : public RasterizerCacheObject {
+class Shader {
public:
- explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
- u32 main_offset);
- ~CachedShader();
+ explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
+ ~Shader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
- std::size_t GetSizeInBytes() const override {
- return program_code.size() * sizeof(u64);
- }
-
VideoCommon::Shader::ShaderIR& GetIR() {
return shader_ir;
}
@@ -144,25 +137,23 @@ private:
ShaderEntries entries;
};
-class VKPipelineCache final : public RasterizerCache<Shader> {
+class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache);
- ~VKPipelineCache();
+ ~VKPipelineCache() override;
- std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+ std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
protected:
- void Unregister(const Shader& shader) override;
-
- void FlushObjectInner(const Shader& object) override {}
+ void OnShaderRemoval(Shader* shader) final;
private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +166,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache;
- Shader null_shader{};
- Shader null_kernel{};
+ std::unique_ptr<Shader> null_shader;
+ std::unique_ptr<Shader> null_kernel;
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index d86c46412..184b2238a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader_cache.h"
namespace Vulkan {
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
}
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
for (std::size_t i = 0; i < std::size(addresses); ++i) {
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
std::size_t stage, std::size_t index = 0) {
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+ if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+ if (entry.is_separated) {
+ const u32 buffer_1 = entry.buffer;
+ const u32 buffer_2 = entry.secondary_buffer;
+ const u32 offset_1 = entry.offset;
+ const u32 offset_2 = entry.secondary_offset;
+ const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
+ return engine.GetTextureInfo(handle_1 | handle_2);
+ }
+ }
if (entry.is_bindless) {
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
return engine.GetTextureInfo(tex_handle);
@@ -716,7 +728,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
if (!view) {
return false;
}
- key.views.push_back(view->GetHandle());
+ key.views.push_back(view->GetAttachment());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
key.layers = std::min(key.layers, view->GetNumLayers());
@@ -776,12 +788,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
}
void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
texture_cache.GuardSamplers(true);
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
// Skip VertexA stage
- const auto& shader = shaders[stage + 1];
+ Shader* const shader = shaders[stage + 1];
if (!shader) {
continue;
}
@@ -1137,8 +1149,8 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
ASSERT(!view->IsBufferView());
- const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
+ const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
+ texture.tic.z_source, texture.tic.w_source);
const auto sampler = sampler_cache.GetSampler(texture.tsc);
update_descriptor_queue.AddSampledImage(sampler, image_view);
@@ -1164,7 +1176,8 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
UNIMPLEMENTED_IF(tic.IsBuffer());
- const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ const VkImageView image_view =
+ view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
update_descriptor_queue.AddImage(image_view);
const auto image_layout = update_descriptor_queue.GetLastImageLayout();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 04be37a5e..c8c187606 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -168,7 +168,7 @@ private:
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+ void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void SetupImageTransitions(Texceptions texceptions,
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index ea487b770..430031665 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
ci.extent = {params.width, params.height, 1};
break;
case SurfaceTarget::Texture3D:
+ ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
ci.extent = {params.width, params.height, params.depth};
break;
case SurfaceTarget::TextureBuffer:
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
return ci;
}
+u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
+ return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+ (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+}
+
} // Anonymous namespace
CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
}
// TODO(Rodrigo): Move this to a virtual function.
- main_view = CreateViewInner(
- ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
- true);
+ u32 num_layers = 1;
+ if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
+ num_layers = params.depth;
+ }
+ main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
}
CachedSurface::~CachedSurface() = default;
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
}
View CachedSurface::CreateView(const ViewParams& params) {
- return CreateViewInner(params, false);
-}
-
-View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
// TODO(Rodrigo): Add name decorations
- return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
+ return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
}
void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
}
CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy)
+ const ViewParams& params)
: VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
- base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
- num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
- : VK_IMAGE_VIEW_TYPE_1D} {}
+ base_level{params.base_level}, num_levels{params.num_levels},
+ image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ base_layer = 0;
+ num_layers = 1;
+ base_slice = params.base_layer;
+ num_slices = params.num_layers;
+ } else {
+ base_layer = params.base_layer;
+ num_layers = params.num_layers;
+ }
+}
CachedSurfaceView::~CachedSurfaceView() = default;
-VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
+VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
+ SwizzleSource z_source, SwizzleSource w_source) {
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
if (last_image_view && last_swizzle == new_swizzle) {
return last_image_view;
@@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
});
}
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ASSERT(base_slice == 0);
+ ASSERT(num_slices == params.depth);
+ }
+
VkImageViewCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
ci.pNext = nullptr;
@@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
return last_image_view = *image_view;
}
+VkImageView CachedSurfaceView::GetAttachment() {
+ if (render_target) {
+ return *render_target;
+ }
+
+ VkImageViewCreateInfo ci;
+ ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+ ci.pNext = nullptr;
+ ci.flags = 0;
+ ci.image = surface.GetImageHandle();
+ ci.format = surface.GetImage().GetFormat();
+ ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
+ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
+ ci.subresourceRange.aspectMask = aspect_mask;
+ ci.subresourceRange.baseMipLevel = base_level;
+ ci.subresourceRange.levelCount = num_levels;
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
+ ci.subresourceRange.baseArrayLayer = base_slice;
+ ci.subresourceRange.layerCount = num_slices;
+ } else {
+ ci.viewType = image_view_type;
+ ci.subresourceRange.baseArrayLayer = base_layer;
+ ci.subresourceRange.layerCount = num_layers;
+ }
+ render_target = device.GetLogical().CreateImageView(ci);
+ return *render_target;
+}
+
VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f211ccb1e..807e26c8a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -91,7 +91,6 @@ protected:
void DecorateSurfaceName();
View CreateView(const ViewParams& params) override;
- View CreateViewInner(const ViewParams& params, bool is_proxy);
private:
void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -120,23 +119,20 @@ private:
class CachedSurfaceView final : public VideoCommon::ViewBase {
public:
explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy);
+ const ViewParams& params);
~CachedSurfaceView();
- VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
+ Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source,
+ Tegra::Texture::SwizzleSource w_source);
+
+ VkImageView GetAttachment();
bool IsSameSurface(const CachedSurfaceView& rhs) const {
return &surface == &rhs.surface;
}
- VkImageView GetHandle() {
- return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
- Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
- }
-
u32 GetWidth() const {
return params.GetMipWidth(base_level);
}
@@ -180,14 +176,6 @@ public:
}
private:
- static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
- }
-
// Store a copy of these values to avoid double dereference when reading them
const SurfaceParams params;
const VkImage image;
@@ -196,15 +184,18 @@ private:
const VKDevice& device;
CachedSurface& surface;
- const u32 base_layer;
- const u32 num_layers;
const u32 base_level;
const u32 num_levels;
const VkImageViewType image_view_type;
+ u32 base_layer = 0;
+ u32 num_layers = 0;
+ u32 base_slice = 0;
+ u32 num_slices = 0;
VkImageView last_image_view = nullptr;
u32 last_swizzle = 0;
+ vk::ImageView render_target;
std::unordered_map<u32, vk::ImageView> view_cache;
};
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8f0bb996e..29ebf65ba 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
- std::optional<u32> buffer) {
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
+ SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
if (info.IsComplete()) {
return info;
}
- const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
- : registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
SamplerInfo sampler_info) {
- const auto offset = static_cast<u32>(sampler.index.Value());
- const auto info = GetSamplerInfo(sampler_info, offset);
+ const u32 offset = static_cast<u32>(sampler.index.Value());
+ const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
const Node sampler_register = GetRegister(reg);
const auto [base_node, tracked_sampler_info] =
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT(base_node != nullptr);
- if (base_node == nullptr) {
+ if (!base_node) {
+ UNREACHABLE();
return std::nullopt;
}
- if (const auto bindless_sampler_info =
- std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
- const u32 buffer = bindless_sampler_info->GetIndex();
- const u32 offset = bindless_sampler_info->GetOffset();
- info = GetSamplerInfo(info, offset, buffer);
+ if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+ const u32 buffer = sampler_info->index;
+ const u32 offset = sampler_info->offset;
+ info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
- [buffer = buffer, offset = offset](const Sampler& entry) {
+ [buffer, offset](const Sampler& entry) {
return entry.buffer == buffer && entry.offset == offset;
});
if (it != used_samplers.end()) {
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
*info.is_shadow, *info.is_buffer, false);
}
- if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
- const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
- index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
- info = GetSamplerInfo(info, base_offset);
+ if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
+ const std::pair indices = sampler_info->indices;
+ const std::pair offsets = sampler_info->offsets;
+ info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
+
+ // Try to use an already created sampler if it exists
+ const auto it = std::find_if(
+ used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
+ return offsets == std::pair{entry.offset, entry.secondary_offset} &&
+ indices == std::pair{entry.buffer, entry.secondary_buffer};
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
+ it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+ return *it;
+ }
+
+ // Otherwise create a new mapping for this sampler
+ const u32 next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
+ *info.is_shadow, *info.is_buffer);
+ }
+ if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+ const u32 base_offset = sampler_info->base_offset / 4;
+ index_var = GetCustomVariable(sampler_info->bindless_var);
+ info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c5e5165ff..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
-class BindlessSamplerNode;
-class ArraySamplerNode;
+struct ArraySamplerNode;
+struct BindlessSamplerNode;
+struct SeparateSamplerNode;
-using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
using TrackSampler = std::shared_ptr<TrackSamplerData>;
struct Sampler {
@@ -288,63 +289,51 @@ struct Sampler {
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
is_buffer{is_buffer}, is_indexed{is_indexed} {}
+ /// Separate sampler constructor
+ constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
+ Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
+ bool is_buffer)
+ : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
+ buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
+ is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
+
/// Bindless samplers constructor
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
- u32 index = 0; ///< Emulated index given for the this sampler.
- u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
- u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
- u32 size = 1; ///< Size of the sampler.
+ u32 index = 0; ///< Emulated index given for the this sampler.
+ u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
+ u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
+ u32 buffer = 0; ///< Buffer where the bindless sampler is read.
+ u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
+ u32 size = 1; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
- bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
- bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
- bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
+ bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
+ bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
+ bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
+ bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
+ bool is_separated = false; ///< Whether the image and sampler is separated or not.
};
/// Represents a tracked bindless sampler into a direct const buffer
-class ArraySamplerNode final {
-public:
- explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
- : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
- constexpr u32 GetBaseOffset() const {
- return base_offset;
- }
-
- constexpr u32 GetIndexVar() const {
- return bindless_var;
- }
-
-private:
+struct ArraySamplerNode {
u32 index;
u32 base_offset;
u32 bindless_var;
};
-/// Represents a tracked bindless sampler into a direct const buffer
-class BindlessSamplerNode final {
-public:
- explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
- constexpr u32 GetOffset() const {
- return offset;
- }
+/// Represents a tracked separate sampler image pair that was folded statically
+struct SeparateSamplerNode {
+ std::pair<u32, u32> indices;
+ std::pair<u32, u32> offsets;
+};
-private:
+/// Represents a tracked bindless sampler into a direct const buffer
+struct BindlessSamplerNode {
u32 index;
u32 offset;
};
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
template <typename T, typename... Args>
TrackSampler MakeTrackSampler(Args&&... args) {
static_assert(std::is_convertible_v<T, TrackSamplerData>);
- return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+ return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
}
template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..cdf274e54 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
return value;
}
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
+ std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
+ SeparateSamplerKey key;
+ key.buffers = buffers;
+ key.offsets = offsets;
+ const auto iter = separate_samplers.find(key);
+ if (iter != separate_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+
+ const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
+ const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
+ const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
+ separate_samplers.emplace(key, value);
+ return value;
+}
+
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
u32 offset) {
const std::pair key = {buffer, offset};
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..231206765 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
namespace VideoCommon::Shader {
+struct SeparateSamplerKey {
+ std::pair<u32, u32> buffers;
+ std::pair<u32, u32> offsets;
+};
+
+} // namespace VideoCommon::Shader
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::Shader::SeparateSamplerKey> {
+ std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
+ return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
+ key.offsets.second);
+ }
+};
+
+template <>
+struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
+ bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
+ const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
+ return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
+ }
+};
+
+} // namespace std
+
+namespace VideoCommon::Shader {
+
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using SeparateSamplerMap =
+ std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
@@ -73,6 +104,9 @@ public:
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
+ std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
+
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
/// Inserts a key.
@@ -128,6 +162,7 @@ private:
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
+ SeparateSamplerMap separate_samplers;
BindlessSamplerMap bindless_samplers;
u32 bound_buffer;
GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 15ae152f2..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -330,8 +330,8 @@ private:
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
/// Queries the missing sampler info from the execution context.
- SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset,
- std::optional<u32> buffer = std::nullopt);
+ SamplerInfo GetSamplerInfo(SamplerInfo info,
+ std::optional<Tegra::Engines::SamplerDescriptor> sampler);
/// Accesses a texture sampler.
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@@ -409,8 +409,14 @@ private:
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
- std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor);
+ std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor);
+
+ std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
+ const OperationNode& operation,
+ Node gpr, Node base_offset,
+ Node tracked, const NodeBlock& code,
+ s64 cursor);
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index eb97bfd41..d5ed81442 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
namespace VideoCommon::Shader {
namespace {
+
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
operation->SetAmendIndex(amend_index);
return true;
- } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ }
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
conditional->SetAmendIndex(amend_index);
return true;
}
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
} // Anonymous namespace
-std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor) {
+std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor) {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+ const u32 cbuf_index = cbuf->GetIndex();
+
// Constant buffer found, test if it's an immediate
const auto& offset = cbuf->GetOffset();
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- auto track =
- MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+ auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
return {tracked, track};
}
if (const auto operation = std::get_if<OperationNode>(&*offset)) {
const u32 bound_buffer = registry.GetBoundBuffer();
- if (bound_buffer != cbuf->GetIndex()) {
+ if (bound_buffer != cbuf_index) {
return {};
}
- const auto pair = DecoupleIndirectRead(*operation);
- if (!pair) {
- return {};
+ if (const std::optional pair = DecoupleIndirectRead(*operation)) {
+ auto [gpr, base_offset] = *pair;
+ return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
+ code, cursor);
}
- auto [gpr, base_offset] = *pair;
- const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
- const auto& gpu_driver = registry.AccessGuestDriverProfile();
- const u32 bindless_cv = NewCustomVariable();
- Node op =
- Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
-
- const Node cv_node = GetCustomVariable(bindless_cv);
- Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
- const std::size_t amend_index = DeclareAmend(std::move(amend_op));
- AmendNodeCv(amend_index, code[cursor]);
- // TODO Implement Bindless Index custom variable
- auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
- offset_inm->GetValue(), bindless_cv);
- return {tracked, track};
}
return {};
}
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return TrackBindlessSampler(source, code, new_cursor);
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
- for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
- if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
- std::get<0>(found)) {
- // Cbuf found in operand.
+ const OperationNode& op = *operation;
+
+ const OperationCode opcode = operation->GetCode();
+ if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
+ ASSERT(op.GetOperandsCount() == 2);
+ auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
+ auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
+ if (node_a && node_b) {
+ auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
+ std::pair{offset_a, offset_b});
+ return {tracked, std::move(track)};
+ }
+ }
+ std::size_t i = op.GetOperandsCount();
+ while (i--) {
+ if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
+ // Constant buffer found in operand.
return found;
}
}
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return {};
}
+std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
+ const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
+ const NodeBlock& code, s64 cursor) {
+ const auto offset_imm = std::get<ImmediateNode>(*base_offset);
+ const auto& gpu_driver = registry.AccessGuestDriverProfile();
+ const u32 bindless_cv = NewCustomVariable();
+ const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
+ Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
+
+ Node cv_node = GetCustomVariable(bindless_cv);
+ Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
+ const std::size_t amend_index = DeclareAmend(std::move(amend_op));
+ AmendNodeCv(amend_index, code[cursor]);
+
+ // TODO: Implement bindless index custom variable
+ auto track =
+ MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
+ return {tracked, track};
+}
+
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 000000000..a23c23886
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,228 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class T>
+class ShaderCache {
+ static constexpr u64 PAGE_SHIFT = 14;
+
+ struct Entry {
+ VAddr addr_start;
+ VAddr addr_end;
+ T* data;
+
+ bool is_memory_marked = true;
+
+ constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+ return start < addr_end && addr_start < end;
+ }
+ };
+
+public:
+ virtual ~ShaderCache() = default;
+
+ /// @brief Removes shaders inside a given region
+ /// @note Checks for ranges
+ /// @param addr Start address of the invalidation
+ /// @param size Number of bytes of the invalidation
+ void InvalidateRegion(VAddr addr, std::size_t size) {
+ std::scoped_lock lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ RemovePendingShaders();
+ }
+
+ /// @brief Unmarks a memory region as cached and marks it for removal
+ /// @param addr Start address of the CPU write operation
+ /// @param size Number of bytes of the CPU write operation
+ void OnCPUWrite(VAddr addr, std::size_t size) {
+ std::lock_guard lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ }
+
+ /// @brief Flushes delayed removal operations
+ void SyncGuestHost() {
+ std::scoped_lock lock{invalidation_mutex};
+ RemovePendingShaders();
+ }
+
+ /// @brief Tries to obtain a cached shader starting in a given address
+ /// @note Doesn't check for ranges, the given address has to be the start of the shader
+ /// @param addr Start address of the shader, this doesn't cache for region
+ /// @return Pointer to a valid shader, nullptr when nothing is found
+ T* TryGet(VAddr addr) const {
+ std::scoped_lock lock{lookup_mutex};
+
+ const auto it = lookup_cache.find(addr);
+ if (it == lookup_cache.end()) {
+ return nullptr;
+ }
+ return it->second->data;
+ }
+
+protected:
+ explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+
+ /// @brief Register in the cache a given entry
+ /// @param data Shader to store in the cache
+ /// @param addr Start address of the shader that will be registered
+ /// @param size Size in bytes of the shader
+ void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
+ std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+ const VAddr addr_end = addr + size;
+ Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+ const u64 page_end = addr_end >> PAGE_SHIFT;
+ for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+ invalidation_cache[page].push_back(entry);
+ }
+
+ storage.push_back(std::move(data));
+
+ rasterizer.UpdatePagesCachedCount(addr, size, 1);
+ }
+
+ /// @brief Called when a shader is going to be removed
+ /// @param shader Shader that will be removed
+ /// @pre invalidation_cache is locked
+ /// @pre lookup_mutex is locked
+ virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+
+private:
+ /// @brief Invalidate pages in a given region
+ /// @pre invalidation_mutex is locked
+ void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
+ const VAddr addr_end = addr + size;
+ const u64 page_end = addr_end >> PAGE_SHIFT;
+ for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+ const auto it = invalidation_cache.find(page);
+ if (it == invalidation_cache.end()) {
+ continue;
+ }
+
+ std::vector<Entry*>& entries = it->second;
+ InvalidatePageEntries(entries, addr, addr_end);
+
+ // If there's nothing else in this page, remove it to avoid overpopulating the hash map.
+ if (entries.empty()) {
+ invalidation_cache.erase(it);
+ }
+ }
+ }
+
+ /// @brief Remove shaders marked for deletion
+ /// @pre invalidation_mutex is locked
+ void RemovePendingShaders() {
+ if (marked_for_removal.empty()) {
+ return;
+ }
+ std::scoped_lock lock{lookup_mutex};
+
+ std::vector<T*> removed_shaders;
+ removed_shaders.reserve(marked_for_removal.size());
+
+ for (Entry* const entry : marked_for_removal) {
+ if (lookup_cache.erase(entry->addr_start) > 0) {
+ removed_shaders.push_back(entry->data);
+ }
+ }
+ marked_for_removal.clear();
+
+ if (!removed_shaders.empty()) {
+ RemoveShadersFromStorage(std::move(removed_shaders));
+ }
+ }
+
+ /// @brief Invalidates entries in a given range for the passed page
+ /// @param entries Vector of entries in the page, it will be modified on overlaps
+ /// @param addr Start address of the invalidation
+ /// @param addr_end Non-inclusive end address of the invalidation
+ /// @pre invalidation_mutex is locked
+ void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+ auto it = entries.begin();
+ while (it != entries.end()) {
+ Entry* const entry = *it;
+ if (!entry->Overlaps(addr, addr_end)) {
+ ++it;
+ continue;
+ }
+ UnmarkMemory(entry);
+ marked_for_removal.push_back(entry);
+
+ it = entries.erase(it);
+ }
+ }
+
+ /// @brief Unmarks an entry from the rasterizer cache
+ /// @param entry Entry to unmark from memory
+ void UnmarkMemory(Entry* entry) {
+ if (!entry->is_memory_marked) {
+ return;
+ }
+ entry->is_memory_marked = false;
+
+ const VAddr addr = entry->addr_start;
+ const std::size_t size = entry->addr_end - addr;
+ rasterizer.UpdatePagesCachedCount(addr, size, -1);
+ }
+
+ /// @brief Removes a vector of shaders from a list
+ /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
+ /// @pre invalidation_mutex is locked
+ /// @pre lookup_mutex is locked
+ void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
+ // Remove duplicates
+ std::sort(removed_shaders.begin(), removed_shaders.end());
+ removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
+ removed_shaders.end());
+
+ // Now that there are no duplicates, we can notify removals
+ for (T* const shader : removed_shaders) {
+ OnShaderRemoval(shader);
+ }
+
+ // Remove them from the cache
+ const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
+ return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
+ removed_shaders.end();
+ };
+ storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
+ }
+
+ /// @brief Creates a new entry in the lookup cache and returns its pointer
+ /// @pre lookup_mutex is locked
+ Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
+ auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+ Entry* const entry_pointer = entry.get();
+
+ lookup_cache.emplace(addr, std::move(entry));
+ return entry_pointer;
+ }
+
+ VideoCore::RasterizerInterface& rasterizer;
+
+ mutable std::mutex lookup_mutex;
+ std::mutex invalidation_mutex;
+
+ std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
+ std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
+ std::vector<std::unique_ptr<T>> storage;
+ std::vector<Entry*> marked_for_removal;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 715f39d0d..94d3a6ae5 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
- // Special case for 3D Texture Segments
- const bool must_read_current_data =
- params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
- if (must_read_current_data) {
+
+ if (params.target == SurfaceTarget::Texture3D) {
+ // Special case for 3D texture segments
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 79e10ffbb..173f2edba 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -217,8 +217,8 @@ public:
}
bool IsProtected() const {
- // Only 3D Slices are to be protected
- return is_target && params.block_depth > 0;
+ // Only 3D slices are to be protected
+ return is_target && params.target == SurfaceTarget::Texture3D;
}
bool IsRenderTarget() const {
@@ -250,6 +250,11 @@ public:
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
}
+ TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
+ return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
+ base_level, num_levels));
+ }
+
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
const GPUVAddr view_addr,
const std::size_t candidate_size, const u32 mipmap,
@@ -272,8 +277,8 @@ public:
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
const std::size_t candidate_size) {
if (params.target == SurfaceTarget::Texture3D ||
- (params.num_levels == 1 && !params.is_layered) ||
- view_params.target == SurfaceTarget::Texture3D) {
+ view_params.target == SurfaceTarget::Texture3D ||
+ (params.num_levels == 1 && !params.is_layered)) {
return {};
}
const auto layer_mipmap{GetLayerMipmap(view_addr)};
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 884fabffe..0b2b2b8c4 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.num_levels = 1;
params.emulated_levels = 1;
- const bool is_layered = config.layers > 1 && params.block_depth == 0;
- params.is_layered = is_layered;
- params.depth = is_layered ? config.layers.Value() : 1;
- params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
+ if (config.memory_layout.is_3d != 0) {
+ params.depth = config.layers.Value();
+ params.is_layered = false;
+ params.target = SurfaceTarget::Texture3D;
+ } else if (config.layers > 1) {
+ params.depth = config.layers.Value();
+ params.is_layered = true;
+ params.target = SurfaceTarget::Texture2DArray;
+ } else {
+ params.depth = 1;
+ params.is_layered = false;
+ params.target = SurfaceTarget::Texture2D;
+ }
return params;
}
@@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
params.width = config.width;
params.height = config.height;
params.pitch = config.pitch;
- // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
+ // TODO(Rodrigo): Try to guess texture arrays from parameters
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.num_levels = 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6f63217a2..b543fc8c0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -298,15 +298,13 @@ public:
const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
- const std::optional<VAddr> dst_cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
- const std::optional<VAddr> src_cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
- std::pair<TSurface, TView> dst_surface =
- GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
- std::pair<TSurface, TView> src_surface =
- GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
- ImageBlit(src_surface.second, dst_surface.second, copy_config);
+
+ const auto& memory_manager = system.GPU().MemoryManager();
+ const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
+ const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
+ std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
+ TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
+ ImageBlit(src_surface, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
@@ -508,12 +506,12 @@ private:
return RecycleStrategy::Flush;
}
// 3D Textures decision
- if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
+ if (params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
for (const auto& s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
- if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
+ if (s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
}
@@ -731,51 +729,9 @@ private:
*/
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
const SurfaceParams& params,
- const GPUVAddr gpu_addr,
- const VAddr cpu_addr,
+ GPUVAddr gpu_addr, VAddr cpu_addr,
bool preserve_contents) {
- if (params.target == SurfaceTarget::Texture3D) {
- bool failed = false;
- if (params.num_levels > 1) {
- // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
- return std::nullopt;
- }
- TSurface new_surface = GetUncachedSurface(gpu_addr, params);
- bool modified = false;
- for (auto& surface : overlaps) {
- const SurfaceParams& src_params = surface->GetSurfaceParams();
- if (src_params.target != SurfaceTarget::Texture2D) {
- failed = true;
- break;
- }
- if (src_params.height != params.height) {
- failed = true;
- break;
- }
- if (src_params.block_depth != params.block_depth ||
- src_params.block_height != params.block_height) {
- failed = true;
- break;
- }
- const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
- const auto offsets = params.GetBlockOffsetXYZ(offset);
- const auto z = std::get<2>(offsets);
- modified |= surface->IsModified();
- const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
- 1);
- ImageCopy(surface, new_surface, copy_params);
- }
- if (failed) {
- return std::nullopt;
- }
- for (const auto& surface : overlaps) {
- Unregister(surface);
- }
- new_surface->MarkAsModified(modified, Tick());
- Register(new_surface);
- auto view = new_surface->GetMainView();
- return {{std::move(new_surface), view}};
- } else {
+ if (params.target != SurfaceTarget::Texture3D) {
for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
@@ -791,11 +747,60 @@ private:
continue;
}
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
- return {{surface, surface->GetMainView()}};
+ return std::make_pair(surface, surface->GetMainView());
}
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
+
+ if (params.num_levels > 1) {
+ // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
+ return std::nullopt;
+ }
+
+ if (overlaps.size() == 1) {
+ const auto& surface = overlaps[0];
+ const SurfaceParams& overlap_params = surface->GetSurfaceParams();
+ // Don't attempt to render to textures with more than one level for now
+ // The texture has to be to the right or the sample address if we want to render to it
+ if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
+ const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
+ const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
+ if (slice < overlap_params.depth) {
+ auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
+ return std::make_pair(std::move(surface), std::move(view));
+ }
+ }
+ }
+
+ TSurface new_surface = GetUncachedSurface(gpu_addr, params);
+ bool modified = false;
+
+ for (auto& surface : overlaps) {
+ const SurfaceParams& src_params = surface->GetSurfaceParams();
+ if (src_params.target != SurfaceTarget::Texture2D ||
+ src_params.height != params.height ||
+ src_params.block_depth != params.block_depth ||
+ src_params.block_height != params.block_height) {
+ return std::nullopt;
+ }
+ modified |= surface->IsModified();
+
+ const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
+ const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
+ const u32 width = params.width;
+ const u32 height = params.height;
+ const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
+ ImageCopy(surface, new_surface, copy_params);
+ }
+ for (const auto& surface : overlaps) {
+ Unregister(surface);
+ }
+ new_surface->MarkAsModified(modified, Tick());
+ Register(new_surface);
+
+ TView view = new_surface->GetMainView();
+ return std::make_pair(std::move(new_surface), std::move(view));
}
/**
@@ -873,7 +878,7 @@ private:
}
}
- // Check if it's a 3D texture
+ // Manage 3D textures
if (params.block_depth > 0) {
auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);