summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-05-23 09:28:34 +0200
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:30 +0200
commitd621e96d0de212cc16897eadf71e8a1b2e1eb5dc (patch)
tree8695f2f4dddf2564b63e4574d6616ccb0e79568c
parentspirv: Be aware of NAN unaware drivers (diff)
downloadyuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.gz
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.bz2
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.lz
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.xz
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.zst
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.zip
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp7
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h53
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h40
-rw-r--r--src/video_core/renderer_opengl/gl_compute_program.cpp178
-rw-r--r--src/video_core/renderer_opengl/gl_compute_program.h83
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp89
-rw-r--r--src/video_core/renderer_opengl/gl_device.h16
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_program.cpp296
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_program.h105
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp275
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h98
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h73
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp257
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h29
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp17
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp13
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h17
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp11
-rw-r--r--src/video_core/shader_cache.cpp17
-rw-r--r--src/video_core/shader_cache.h23
-rw-r--r--src/video_core/shader_environment.cpp4
-rw-r--r--src/video_core/shader_environment.h16
-rw-r--r--src/video_core/texture_cache/formatter.cpp4
-rw-r--r--src/video_core/texture_cache/formatter.h3
-rw-r--r--src/video_core/textures/texture.h9
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp2
38 files changed, 1427 insertions, 705 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index b3c9fe72a..5913fdeff 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() {
return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
}
+Value IREmitter::LocalInvocationId() {
+ return Inst(Opcode::LocalInvocationId);
+}
+
U32 IREmitter::LocalInvocationIdX() {
return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 4441c495d..a12919283 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -95,6 +95,7 @@ public:
[[nodiscard]] U32 WorkgroupIdY();
[[nodiscard]] U32 WorkgroupIdZ();
+ [[nodiscard]] Value LocalInvocationId();
[[nodiscard]] U32 LocalInvocationIdX();
[[nodiscard]] U32 LocalInvocationIdY();
[[nodiscard]] U32 LocalInvocationIdZ();
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
index b0baff74b..01fb6f5e5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -120,6 +120,13 @@ enum class SpecialRegister : u64 {
case SpecialRegister::SR_INVOCATION_INFO:
// LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed");
return ir.Imm32(0x00ff'0000);
+ case SpecialRegister::SR_TID: {
+ const IR::Value tid{ir.LocalInvocationId()};
+ return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
+ IR::U32{ir.CompositeExtract(tid, 1)},
+ ir.Imm32(16), ir.Imm32(8)),
+ IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
+ }
case SpecialRegister::SR_TID_X:
return ir.LocalInvocationIdX();
case SpecialRegister::SR_TID_Y:
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6e0e4b8f5..b008c37c0 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -67,10 +67,14 @@ add_library(video_core STATIC
renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
+ renderer_opengl/gl_compute_program.cpp
+ renderer_opengl/gl_compute_program.h
renderer_opengl/gl_device.cpp
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
+ renderer_opengl/gl_graphics_program.cpp
+ renderer_opengl/gl_graphics_program.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_resource_manager.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 29746f61d..6c92e4c30 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -70,8 +70,8 @@ class BufferCache {
P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
- static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX;
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
+ static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr BufferId NULL_BUFFER_ID{0};
@@ -154,7 +154,7 @@ public:
void UnbindGraphicsTextureBuffers(size_t stage);
void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
- PixelFormat format, bool is_written);
+ PixelFormat format, bool is_written, bool is_image);
void UnbindComputeStorageBuffers();
@@ -164,7 +164,7 @@ public:
void UnbindComputeTextureBuffers();
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
- bool is_written);
+ bool is_written, bool is_image);
void FlushCachedWrites();
@@ -197,6 +197,7 @@ public:
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
std::mutex mutex;
+ Runtime& runtime;
private:
template <typename Func>
@@ -366,7 +367,6 @@ private:
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
- Runtime& runtime;
SlotVector<Buffer> slot_buffers;
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -394,8 +394,10 @@ private:
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{};
+ std::array<u32, NUM_STAGES> image_texture_buffers{};
u32 enabled_compute_texture_buffers = 0;
u32 written_compute_texture_buffers = 0;
+ u32 image_compute_texture_buffers = 0;
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
@@ -431,8 +433,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_)
- : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
- gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+ : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
deletion_iterator = slot_buffers.end();
@@ -703,13 +705,18 @@ template <class P>
void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
enabled_texture_buffers[stage] = 0;
written_texture_buffers[stage] = 0;
+ image_texture_buffers[stage] = 0;
}
template <class P>
void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
- u32 size, PixelFormat format, bool is_written) {
+ u32 size, PixelFormat format, bool is_written,
+ bool is_image) {
enabled_texture_buffers[stage] |= 1U << tbo_index;
written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
+ }
texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
}
@@ -717,6 +724,7 @@ template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
enabled_compute_storage_buffers = 0;
written_compute_storage_buffers = 0;
+ image_compute_texture_buffers = 0;
}
template <class P>
@@ -737,13 +745,17 @@ template <class P>
void BufferCache<P>::UnbindComputeTextureBuffers() {
enabled_compute_texture_buffers = 0;
written_compute_texture_buffers = 0;
+ image_compute_texture_buffers = 0;
}
template <class P>
void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
- PixelFormat format, bool is_written) {
+ PixelFormat format, bool is_written, bool is_image) {
enabled_compute_texture_buffers |= 1U << tbo_index;
written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
+ }
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
}
@@ -1057,7 +1069,6 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
- u32 binding_index = 0;
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
const TextureBufferBinding& binding = texture_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1066,9 +1077,12 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
- if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) {
- runtime.BindTextureBuffer(binding_index, buffer, offset, size, format);
- ++binding_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ if (((image_texture_buffers[stage] >> index) & 1) != 0) {
+ runtime.BindImageBuffer(buffer, offset, size, format);
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
}
@@ -1139,7 +1153,6 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
template <class P>
void BufferCache<P>::BindHostComputeTextureBuffers() {
- u32 binding_index = 0;
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
const TextureBufferBinding& binding = compute_texture_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1148,9 +1161,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
- if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) {
- runtime.BindTextureBuffer(binding_index, buffer, offset, size, format);
- ++binding_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ if (((image_compute_texture_buffers >> index) & 1) != 0) {
+ runtime.BindImageBuffer(buffer, offset, size, format);
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
}
@@ -1339,11 +1355,10 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
// Resolve buffer
Binding& binding = compute_storage_buffers[index];
- const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
- binding.buffer_id = buffer_id;
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
// Mark as written if needed
if (((written_compute_storage_buffers >> index) & 1) != 0) {
- MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
}
});
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index c4189fb60..2d0ef1307 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <span>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
+using VideoCore::Surface::PixelFormat;
+
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
@@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept {
glMakeNamedBufferResidentNV(buffer.handle, access);
}
+GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
+ const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+ return offset == view.offset && size == view.size && format == view.format;
+ })};
+ if (it != views.end()) {
+ return it->texture.handle;
+ }
+ OGLTexture texture;
+ texture.Create(GL_TEXTURE_BUFFER);
+ const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
+ glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size);
+ views.push_back({
+ .offset = offset,
+ .size = size,
+ .format = format,
+ .texture = std::move(texture),
+ });
+ return views.back().texture.handle;
+}
+
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
@@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
+ const GLuint base_binding = graphics_base_storage_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
+void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ PixelFormat format) {
+ *texture_handles++ = buffer.View(offset, size, format);
+}
+
+void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
+ *image_handles++ = buffer.View(offset, size, format);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index ddcce5e97..4986c65fd 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
void MakeResident(GLenum access) noexcept;
+ [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
@@ -41,9 +43,17 @@ public:
}
private:
+ struct BufferView {
+ u32 offset;
+ u32 size;
+ VideoCore::Surface::PixelFormat format;
+ OGLTexture texture;
+ };
+
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
+ std::vector<BufferView> views;
};
class BufferCacheRuntime {
@@ -75,13 +85,19 @@ public:
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+ void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
+ void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
if (use_assembly_shaders) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
fast_uniforms[stage][binding_index].handle, 0,
@@ -103,7 +119,7 @@ public:
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -118,6 +134,19 @@ public:
return has_fast_buffer_sub_data;
}
+ void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_uniform_bindings = bindings;
+ }
+
+ void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_storage_bindings = bindings;
+ }
+
+ void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
+ texture_handles = texture_handles_;
+ image_handles = image_handles_;
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -133,6 +162,11 @@ private:
u32 max_attributes = 0;
+ std::array<GLuint, 5> graphics_base_uniform_bindings{};
+ std::array<GLuint, 5> graphics_base_storage_bindings{};
+ GLuint* texture_handles = nullptr;
+ GLuint* image_handles = nullptr;
+
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -155,8 +189,8 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
- static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
+ static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp
new file mode 100644
index 000000000..d5ef65439
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_program.cpp
@@ -0,0 +1,178 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/cityhash.h"
+#include "video_core/renderer_opengl/gl_compute_program.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+
+namespace OpenGL {
+
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 16;
+
+size_t ComputeProgramKey::Hash() const noexcept {
+ return static_cast<size_t>(
+ Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
+}
+
+bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, sizeof *this) == 0;
+}
+
+ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, OGLProgram program_,
+ const Shader::Info& info_)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
+ kepler_compute{kepler_compute_},
+ program_manager{program_manager_}, program{std::move(program_)}, info{info_} {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ num_texture_buffers += desc.count;
+ }
+ for (const auto& desc : info.image_buffer_descriptors) {
+ num_image_buffers += desc.count;
+ }
+ u32 num_textures = num_texture_buffers;
+ for (const auto& desc : info.texture_descriptors) {
+ num_textures += desc.count;
+ }
+ ASSERT(num_textures <= MAX_TEXTURES);
+
+ u32 num_images = num_image_buffers;
+ for (const auto& desc : info.image_descriptors) {
+ num_images += desc.count;
+ }
+ ASSERT(num_images <= MAX_IMAGES);
+}
+
+void ComputeProgram::Configure() {
+ buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
+ buffer_cache.UnbindComputeStorageBuffers();
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+ desc.is_written);
+ ++ssbo_index;
+ }
+ texture_cache.SynchronizeComputeDescriptors();
+
+ std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ GLsizei sampler_binding{};
+ GLsizei texture_binding{};
+ GLsizei image_binding{};
+
+ const auto& qmd{kepler_compute.launch_description};
+ const auto& cbufs{qmd.const_buffer_config};
+ const bool via_header_index{qmd.linked_tsc != 0};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+ const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+ secondary_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ return TexturePair(lhs_raw | rhs_raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ }
+ }};
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ std::ranges::for_each(info.image_descriptors, add_image);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ buffer_cache.UnbindComputeTextureBuffers();
+ size_t texbuf_index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
+ buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++texbuf_index;
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+ std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+ buffer_cache.UpdateComputeBuffers();
+
+ buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
+ buffer_cache.BindHostComputeBuffers();
+
+ const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
+ texture_binding += num_texture_buffers;
+ image_binding += num_image_buffers;
+
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ images[image_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+ program_manager.BindProgram(program.handle);
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h
new file mode 100644
index 000000000..64a75d44d
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_program.h
@@ -0,0 +1,83 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines {
+class KeplerCompute;
+}
+
+namespace Shader {
+struct Info;
+}
+
+namespace OpenGL {
+
+class ProgramManager;
+
+struct ComputeProgramKey {
+ u64 unique_hash;
+ u32 shared_memory_size;
+ std::array<u32, 3> workgroup_size;
+
+ size_t Hash() const noexcept;
+
+ bool operator==(const ComputeProgramKey&) const noexcept;
+
+ bool operator!=(const ComputeProgramKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(std::has_unique_object_representations_v<ComputeProgramKey>);
+static_assert(std::is_trivially_copyable_v<ComputeProgramKey>);
+static_assert(std::is_trivially_constructible_v<ComputeProgramKey>);
+
+class ComputeProgram {
+public:
+ explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, OGLProgram program_,
+ const Shader::Info& info_);
+
+ void Configure();
+
+private:
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ ProgramManager& program_manager;
+
+ OGLProgram program;
+ Shader::Info info;
+
+ u32 num_texture_buffers{};
+ u32 num_image_buffers{};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::ComputeProgramKey> {
+ size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3b00614e7..18bbc4c1f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -22,34 +22,11 @@
namespace OpenGL {
namespace {
-// One uniform block is reserved for emulation purposes
-constexpr u32 ReservedUniformBlocks = 1;
-
-constexpr u32 NumStages = 5;
-
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
-constexpr std::array LIMIT_SSBOS = {
- GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
- GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
-};
-constexpr std::array LIMIT_SAMPLERS = {
- GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
- GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS,
- GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
-};
-constexpr std::array LIMIT_IMAGES = {
- GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
- GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
- GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
-};
template <typename T>
T GetInteger(GLenum pname) {
@@ -82,15 +59,6 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
return std::ranges::find(extensions, extension) != extensions.end();
}
-u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
- ASSERT(num >= amount);
- if (limit) {
- amount = std::min(amount, GetInteger<u32>(*limit));
- }
- num -= amount;
- return std::exchange(base, base + amount);
-}
-
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
std::ranges::transform(LIMIT_UBOS, max.begin(),
@@ -98,62 +66,6 @@ std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcep
return max;
}
-std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
- std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
-
- static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
- const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
- const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
- const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
-
- u32 num_ubos = total_ubos - ReservedUniformBlocks;
- u32 num_ssbos = total_ssbos;
- u32 num_samplers = total_samplers;
-
- u32 base_ubo = ReservedUniformBlocks;
- u32 base_ssbo = 0;
- u32 base_samplers = 0;
-
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- bindings[stage] = {
- Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
- Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
- Extract(base_samplers, num_samplers, total_samplers / NumStages,
- LIMIT_SAMPLERS[stage])};
- }
-
- u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
- u32 base_images = 0;
-
- // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
- // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
- // fragment stage, and at least 1 for the rest of the stages.
- // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
-
- // Reserve at least 4 image bindings on the fragment stage.
- bindings[4].image =
- Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
-
- // This is guaranteed to be at least 1.
- const u32 total_extracted_images = num_images / (NumStages - 1);
-
- // Reserve the other image bindings.
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- if (stage == 4) {
- continue;
- }
- bindings[stage].image =
- Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
- }
-
- // Compute doesn't care about any of this.
- bindings[5] = {0, 0, 0, 0};
-
- return bindings;
-}
-
bool IsASTCSupported() {
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
static constexpr std::array formats = {
@@ -225,7 +137,6 @@ Device::Device() {
}
max_uniform_buffers = BuildMaxUniformBuffers();
- base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2c2b13767..152a3acd3 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -12,13 +12,6 @@ namespace OpenGL {
class Device {
public:
- struct BaseBindings {
- u32 uniform_buffer{};
- u32 shader_storage_buffer{};
- u32 sampler{};
- u32 image{};
- };
-
explicit Device();
explicit Device(std::nullptr_t);
@@ -28,14 +21,6 @@ public:
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
}
- const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
- return base_bindings[stage_index];
- }
-
- const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
- return GetBaseBindings(static_cast<std::size_t>(shader_type));
- }
-
size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
@@ -134,7 +119,6 @@ private:
std::string vendor_name;
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
- std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp
new file mode 100644
index 000000000..fd0958719
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp
@@ -0,0 +1,296 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/cityhash.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_graphics_program.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
+#include "video_core/texture_cache/texture_cache.h"
+
+namespace OpenGL {
+
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 8;
+
+size_t GraphicsProgramKey::Hash() const noexcept {
+ return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
+}
+
+bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, Size()) == 0;
+}
+
+GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ OGLProgram program_,
+ const std::array<const Shader::Info*, 5>& infos)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
+ gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
+ state_tracker{state_tracker_}, program{std::move(program_)} {
+ std::ranges::transform(infos, stage_infos.begin(),
+ [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
+
+ u32 num_textures{};
+ u32 num_images{};
+ for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) {
+ const auto& info{stage_infos[stage]};
+ base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
+ base_storage_bindings[stage + 1] = base_storage_bindings[stage];
+ for (const auto& desc : info.constant_buffer_descriptors) {
+ base_uniform_bindings[stage + 1] += desc.count;
+ }
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ base_storage_bindings[stage + 1] += desc.count;
+ }
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ num_texture_buffers[stage] += desc.count;
+ num_textures += desc.count;
+ }
+ for (const auto& desc : info.image_buffer_descriptors) {
+ num_image_buffers[stage] += desc.count;
+ num_images += desc.count;
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ num_textures += desc.count;
+ }
+ for (const auto& desc : info.image_descriptors) {
+ num_images += desc.count;
+ }
+ }
+ ASSERT(num_textures <= MAX_TEXTURES);
+ ASSERT(num_images <= MAX_IMAGES);
+}
+
+struct Spec {
+ static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+ static constexpr bool has_storage_buffers = true;
+ static constexpr bool has_texture_buffers = true;
+ static constexpr bool has_image_buffers = true;
+ static constexpr bool has_images = true;
+};
+
+void GraphicsProgram::Configure(bool is_indexed) {
+ std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ size_t image_view_index{};
+ GLsizei sampler_binding{};
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
+ buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
+ buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
+
+ const auto& regs{maxwell3d.regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ const auto config_stage{[&](size_t stage) {
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ if constexpr (Spec::has_storage_buffers) {
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+ desc.cbuf_offset, desc.is_written);
+ ++ssbo_index;
+ }
+ }
+ const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(cbufs[desc.cbuf_index].enabled);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+ const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+ second_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 raw{lhs_raw | rhs_raw};
+ return TexturePair(raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ }
+ }};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+
+ Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ config_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ config_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ config_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ config_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ config_stage(4);
+ }
+ const std::span indices_span(image_view_indices.data(), image_view_index);
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ ImageId* texture_buffer_index{image_view_ids.data()};
+ const auto bind_stage_info{[&](size_t stage) {
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++index;
+ ++texture_buffer_index;
+ }
+ }};
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsTextureBuffers(stage);
+
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ bind_stage_info(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ bind_stage_info(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ bind_stage_info(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ bind_stage_info(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ bind_stage_info(4);
+ }
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
+ const ImageId* views_it{image_view_ids.data()};
+ GLsizei texture_binding = 0;
+ GLsizei image_binding = 0;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ const auto prepare_stage{[&](size_t stage) {
+ buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
+ buffer_cache.BindHostStageBuffers(stage);
+
+ texture_binding += num_texture_buffers[stage];
+ image_binding += num_image_buffers[stage];
+
+ const auto& info{stage_infos[stage]};
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ images[image_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ prepare_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ prepare_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ prepare_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ prepare_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ prepare_stage(4);
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+ texture_cache.UpdateRenderTargets(false);
+
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+ program_manager.BindProgram(program.handle);
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h
new file mode 100644
index 000000000..5adf3f41e
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_program.h
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace OpenGL {
+
+class ProgramManager;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+struct GraphicsProgramKey {
+ struct TransformFeedbackState {
+ struct Layout {
+ u32 stream;
+ u32 varying_count;
+ u32 stride;
+ };
+ std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
+ std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
+ };
+
+ std::array<u64, 6> unique_hashes;
+ union {
+ u32 raw;
+ BitField<0, 1, u32> xfb_enabled;
+ BitField<1, 1, u32> early_z;
+ BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
+ BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
+ BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
+ BitField<10, 1, u32> tessellation_clockwise;
+ };
+ std::array<u32, 3> padding;
+ TransformFeedbackState xfb_state;
+
+ size_t Hash() const noexcept;
+
+ bool operator==(const GraphicsProgramKey&) const noexcept;
+
+ bool operator!=(const GraphicsProgramKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ [[nodiscard]] size_t Size() const noexcept {
+ if (xfb_enabled != 0) {
+ return sizeof(GraphicsProgramKey);
+ } else {
+ return offsetof(GraphicsProgramKey, padding);
+ }
+ }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
+
+class GraphicsProgram {
+public:
+ explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ OGLProgram program_, const std::array<const Shader::Info*, 5>& infos);
+
+ void Configure(bool is_indexed);
+
+private:
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+
+ OGLProgram program;
+ std::array<Shader::Info, 5> stage_infos{};
+ std::array<u32, 5> base_uniform_bindings{};
+ std::array<u32, 5> base_storage_bindings{};
+ std::array<u32, 5> num_texture_buffers{};
+ std::array<u32, 5> num_image_buffers{};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::GraphicsProgramKey> {
+ size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index dd1937863..e527b76ba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+ shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
+ buffer_cache, program_manager, state_tracker),
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
@@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncState();
- // Setup shaders and their used resources.
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()};
- texture_cache.UpdateRenderTargets(false);
- state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
- program_manager.BindGraphicsPipeline();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ program->Configure(is_indexed);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(primitive_mode);
@@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
-
EndTransformFeedback();
++num_queued_commands;
@@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute() {
- UNREACHABLE_MSG("Not implemented");
+ ComputeProgram* const program{shader_cache.CurrentComputeProgram()};
+ if (!program) {
+ return;
+ }
+ program->Configure();
+ const auto& qmd{kepler_compute.launch_description};
+ glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
+ ++num_queued_commands;
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
- screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+ screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c3e490b40..c9ca1f005 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -16,6 +16,11 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/program.h"
+#include "shader_recompiler/profile.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
@@ -25,17 +30,281 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
namespace OpenGL {
+namespace {
+// FIXME: Move this somewhere else
+const Shader::Profile profile{
+ .supported_spirv = 0x00010000,
+
+ .unified_descriptor_binding = false,
+ .support_descriptor_aliasing = false,
+ .support_int8 = false,
+ .support_int16 = false,
+ .support_vertex_instance_id = true,
+ .support_float_controls = false,
+ .support_separate_denorm_behavior = false,
+ .support_separate_rounding_mode = false,
+ .support_fp16_denorm_preserve = false,
+ .support_fp32_denorm_preserve = false,
+ .support_fp16_denorm_flush = false,
+ .support_fp32_denorm_flush = false,
+ .support_fp16_signed_zero_nan_preserve = false,
+ .support_fp32_signed_zero_nan_preserve = false,
+ .support_fp64_signed_zero_nan_preserve = false,
+ .support_explicit_workgroup_layout = false,
+ .support_vote = true,
+ .support_viewport_index_layer_non_geometry = true,
+ .support_viewport_mask = true,
+ .support_typeless_image_loads = true,
+ .support_demote_to_helper_invocation = false,
+ .warp_size_potentially_larger_than_guest = true,
+ .support_int64_atomics = false,
+ .lower_left_origin_mode = true,
+
+ .has_broken_spirv_clamp = true,
+ .has_broken_unsigned_image_offsets = true,
+ .has_broken_signed_operations = true,
+ .ignore_nan_fp_comparisons = true,
+
+ .generic_input_types = {},
+ .convert_depth_mode = false,
+ .force_early_z = false,
+
+ .tess_primitive = {},
+ .tess_spacing = {},
+ .tess_clockwise = false,
+
+ .input_topology = Shader::InputTopology::Triangles,
+
+ .fixed_state_point_size = std::nullopt,
+
+ .alpha_test_func = Shader::CompareFunction::Always,
+ .alpha_test_reference = 0.0f,
+
+ .y_negate = false,
+
+ .xfb_varyings = {},
+};
+
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::GraphicsEnvironment;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+ return std::span(container.data(), container.size());
+}
+
+void AddShader(GLenum stage, GLuint program, std::span<const u32> code) {
+ OGLShader shader;
+ shader.handle = glCreateShader(stage);
+
+ glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
+ static_cast<GLsizei>(code.size_bytes()));
+ glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
+ glAttachShader(program, shader.handle);
+ if (!Settings::values.renderer_debug) {
+ return;
+ }
+ GLint shader_status{};
+ glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status);
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "Failed to build shader");
+ }
+ GLint log_length{};
+ glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return;
+ }
+ std::string log(log_length, 0);
+ glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data());
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
+ }
+}
+
+void LinkProgram(GLuint program) {
+ glLinkProgram(program);
+ if (!Settings::values.renderer_debug) {
+ return;
+ }
+ GLint link_status{};
+ glGetProgramiv(program, GL_LINK_STATUS, &link_status);
+
+ GLint log_length{};
+ glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return;
+ }
+ std::string log(log_length, 0);
+ glGetProgramInfoLog(program, log_length, nullptr, log.data());
+ if (link_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
+ }
+}
+
+GLenum Stage(size_t stage_index) {
+ switch (stage_index) {
+ case 0:
+ return GL_VERTEX_SHADER;
+ case 1:
+ return GL_TESS_CONTROL_SHADER;
+ case 2:
+ return GL_TESS_EVALUATION_SHADER;
+ case 3:
+ return GL_GEOMETRY_SHADER;
+ case 4:
+ return GL_FRAGMENT_SHADER;
+ }
+ UNREACHABLE_MSG("{}", stage_index);
+ return GL_NONE;
+}
+} // Anonymous namespace
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
- Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_)
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_)
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
- emu_window{emu_window_}, gpu{gpu_}, device{device_} {}
+ emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
+ buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{
+ state_tracker_} {}
ShaderCache::~ShaderCache() = default;
+GraphicsProgram* ShaderCache::CurrentGraphicsProgram() {
+ if (!RefreshStages(graphics_key.unique_hashes)) {
+ return nullptr;
+ }
+ const auto& regs{maxwell3d.regs};
+ graphics_key.raw = 0;
+ graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+ graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
+ ? regs.draw.topology.Value()
+ : Maxwell::PrimitiveTopology{});
+ graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
+ graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
+ graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
+
+ const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+ auto& program{pair->second};
+ if (is_new) {
+ program = CreateGraphicsProgram();
+ }
+ return program.get();
+}
+
+ComputeProgram* ShaderCache::CurrentComputeProgram() {
+ const VideoCommon::ShaderInfo* const shader{ComputeShader()};
+ if (!shader) {
+ return nullptr;
+ }
+ const auto& qmd{kepler_compute.launch_description};
+ const ComputeProgramKey key{
+ .unique_hash = shader->unique_hash,
+ .shared_memory_size = qmd.shared_alloc,
+ .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+ };
+ const auto [pair, is_new]{compute_cache.try_emplace(key)};
+ auto& pipeline{pair->second};
+ if (!is_new) {
+ return pipeline.get();
+ }
+ pipeline = CreateComputeProgram(key, shader);
+ return pipeline.get();
+}
+
+std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram() {
+ GraphicsEnvironments environments;
+ GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
+
+ main_pools.ReleaseContents();
+ return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true);
+}
+
+std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram(
+ ShaderPools& pools, const GraphicsProgramKey& key, std::span<Shader::Environment* const> envs,
+ bool build_in_parallel) {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+ size_t env_index{0};
+ std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
+ }
+ Shader::Environment& env{*envs[env_index]};
+ ++env_index;
+
+ const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+ Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
+ programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
+ }
+ std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+
+ OGLProgram gl_program;
+ gl_program.handle = glCreateProgram();
+
+ Shader::Backend::SPIRV::Bindings binding;
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
+ }
+ UNIMPLEMENTED_IF(index == 0);
+
+ Shader::IR::Program& program{programs[index]};
+ const size_t stage_index{index - 1};
+ infos[stage_index] = &program.info;
+
+ const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
+ FILE* file = fopen("D:\\shader.spv", "wb");
+ fwrite(code.data(), 4, code.size(), file);
+ fclose(file);
+ AddShader(Stage(stage_index), gl_program.handle, code);
+ }
+ LinkProgram(gl_program.handle);
+
+ return std::make_unique<GraphicsProgram>(texture_cache, buffer_cache, gpu_memory, maxwell3d,
+ program_manager, state_tracker, std::move(gl_program),
+ infos);
+}
+
+std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(
+ const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ env.SetCachedSize(shader->size_bytes);
+
+ main_pools.ReleaseContents();
+ return CreateComputeProgram(main_pools, key, env, true);
+}
+
+std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(ShaderPools& pools,
+ const ComputeProgramKey& key,
+ Shader::Environment& env,
+ bool build_in_parallel) {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+
+ Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+ Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
+ Shader::Backend::SPIRV::Bindings binding;
+ const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
+ OGLProgram gl_program;
+ gl_program.handle = glCreateProgram();
+ AddShader(GL_COMPUTE_SHADER, gl_program.handle, code);
+ LinkProgram(gl_program.handle);
+ return std::make_unique<ComputeProgram>(texture_cache, buffer_cache, gpu_memory, kepler_compute,
+ program_manager, std::move(gl_program), program.info);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 96520e17c..b479d073a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,20 +5,18 @@
#pragma once
#include <array>
-#include <atomic>
-#include <bitset>
-#include <memory>
-#include <string>
-#include <tuple>
#include <unordered_map>
-#include <unordered_set>
-#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_compute_program.h"
+#include "video_core/renderer_opengl/gl_graphics_program.h"
#include "video_core/shader_cache.h"
namespace Tegra {
@@ -32,64 +30,62 @@ class EmuWindow;
namespace OpenGL {
class Device;
+class ProgramManager;
class RasterizerOpenGL;
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-struct GraphicsProgramKey {
- struct TransformFeedbackState {
- struct Layout {
- u32 stream;
- u32 varying_count;
- u32 stride;
- };
- std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
- std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
- };
-
- std::array<u64, 6> unique_hashes;
- std::array<u8, Maxwell::NumRenderTargets> color_formats;
- union {
- u32 raw;
- BitField<0, 1, u32> xfb_enabled;
- BitField<1, 1, u32> early_z;
- BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
- BitField<6, 2, u32> tessellation_primitive;
- BitField<8, 2, u32> tessellation_spacing;
- BitField<10, 1, u32> tessellation_clockwise;
- };
- u32 padding;
- TransformFeedbackState xfb_state;
-
- [[nodiscard]] size_t Size() const noexcept {
- if (xfb_enabled != 0) {
- return sizeof(GraphicsProgramKey);
- } else {
- return offsetof(GraphicsProgramKey, padding);
- }
+struct ShaderPools {
+ void ReleaseContents() {
+ flow_block.ReleaseContents();
+ block.ReleaseContents();
+ inst.ReleaseContents();
}
-};
-static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
-class GraphicsProgram {
-public:
-private:
+ Shader::ObjectPool<Shader::IR::Inst> inst;
+ Shader::ObjectPool<Shader::IR::Block> block;
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
class ShaderCache : public VideoCommon::ShaderCache {
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
- Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_);
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_);
~ShaderCache();
+ [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram();
+
+ [[nodiscard]] ComputeProgram* CurrentComputeProgram();
+
private:
+ std::unique_ptr<GraphicsProgram> CreateGraphicsProgram();
+
+ std::unique_ptr<GraphicsProgram> CreateGraphicsProgram(
+ ShaderPools& pools, const GraphicsProgramKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel);
+
+ std::unique_ptr<ComputeProgram> CreateComputeProgram(const ComputeProgramKey& key,
+ const VideoCommon::ShaderInfo* shader);
+
+ std::unique_ptr<ComputeProgram> CreateComputeProgram(ShaderPools& pools,
+ const ComputeProgramKey& key,
+ Shader::Environment& env,
+ bool build_in_parallel);
+
Core::Frontend::EmuWindow& emu_window;
- Tegra::GPU& gpu;
const Device& device;
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+
+ GraphicsProgramKey graphics_key{};
+
+ ShaderPools main_pools;
+ std::unordered_map<GraphicsProgramKey, std::unique_ptr<GraphicsProgram>> graphics_cache;
+ std::unordered_map<ComputeProgramKey, std::unique_ptr<ComputeProgram>> compute_cache;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-
-namespace OpenGL {
-
-namespace {
-
-void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
- if (current == old) {
- return;
- }
- if (current == 0) {
- if (enabled) {
- enabled = false;
- glDisable(stage);
- }
- return;
- }
- if (!enabled) {
- enabled = true;
- glEnable(stage);
- }
- glBindProgramARB(stage, current);
-}
-
-} // Anonymous namespace
-
-ProgramManager::ProgramManager(const Device& device)
- : use_assembly_programs{device.UseAssemblyShaders()} {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- } else {
- graphics_pipeline.Create();
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-ProgramManager::~ProgramManager() = default;
-
-void ProgramManager::BindCompute(GLuint program) {
- if (use_assembly_programs) {
- glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
- } else {
- is_graphics_bound = false;
- glUseProgram(program);
- }
-}
-
-void ProgramManager::BindGraphicsPipeline() {
- if (!use_assembly_programs) {
- UpdateSourcePrograms();
- }
-}
-
-void ProgramManager::BindHostPipeline(GLuint pipeline) {
- if (use_assembly_programs) {
- if (geometry_enabled) {
- geometry_enabled = false;
- old_state.geometry = 0;
- glDisable(GL_GEOMETRY_PROGRAM_NV);
- }
- } else {
- if (!is_graphics_bound) {
- glUseProgram(0);
- }
- }
- glBindProgramPipeline(pipeline);
-}
-
-void ProgramManager::RestoreGuestPipeline() {
- if (use_assembly_programs) {
- glBindProgramPipeline(0);
- } else {
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-void ProgramManager::BindHostCompute(GLuint program) {
- if (use_assembly_programs) {
- glDisable(GL_COMPUTE_PROGRAM_NV);
- }
- glUseProgram(program);
- is_graphics_bound = false;
-}
-
-void ProgramManager::RestoreGuestCompute() {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- glUseProgram(0);
- }
-}
-
-void ProgramManager::UseVertexShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
- }
- current_state.vertex = program;
-}
-
-void ProgramManager::UseGeometryShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
- }
- current_state.geometry = program;
-}
-
-void ProgramManager::UseFragmentShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
- }
- current_state.fragment = program;
-}
-
-void ProgramManager::UpdateSourcePrograms() {
- if (!is_graphics_bound) {
- is_graphics_bound = true;
- glUseProgram(0);
- }
-
- const GLuint handle = graphics_pipeline.handle;
- const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
- if (current == old) {
- return;
- }
- glUseProgramStages(handle, stage, current);
- };
- update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
- update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
- update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
-
- old_state = current_state;
-}
-
-void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
- const auto& regs = maxwell.regs;
-
- // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
- y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..70781d6f5 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,24 @@
#pragma once
-#include <cstddef>
-
#include <glad/glad.h>
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
-
namespace OpenGL {
-class Device;
-
-/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-/// Not following that rule will cause problems on some AMD drivers.
-struct alignas(16) MaxwellUniformData {
- void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
-
- GLfloat y_direction;
-};
-static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
-static_assert(sizeof(MaxwellUniformData) < 16384,
- "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
-
class ProgramManager {
public:
- explicit ProgramManager(const Device& device);
- ~ProgramManager();
-
- /// Binds a compute program
- void BindCompute(GLuint program);
-
- /// Updates bound programs.
- void BindGraphicsPipeline();
-
- /// Binds an OpenGL pipeline object unsynchronized with the guest state.
- void BindHostPipeline(GLuint pipeline);
+ void BindProgram(GLuint program) {
+ if (bound_program == program) {
+ return;
+ }
+ bound_program = program;
+ glUseProgram(program);
+ }
- /// Rewinds BindHostPipeline state changes.
- void RestoreGuestPipeline();
-
- /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
- void BindHostCompute(GLuint program);
-
- /// Rewinds BindHostCompute state changes.
- void RestoreGuestCompute();
-
- void UseVertexShader(GLuint program);
- void UseGeometryShader(GLuint program);
- void UseFragmentShader(GLuint program);
+ void RestoreGuestCompute() {}
private:
- struct PipelineState {
- GLuint vertex = 0;
- GLuint geometry = 0;
- GLuint fragment = 0;
- };
-
- /// Update GLSL programs.
- void UpdateSourcePrograms();
-
- OGLPipeline graphics_pipeline;
-
- PipelineState current_state;
- PipelineState old_state;
-
- bool use_assembly_programs = false;
-
- bool is_graphics_bound = true;
-
- bool vertex_enabled = false;
- bool geometry_enabled = false;
- bool fragment_enabled = false;
+ GLuint bound_program = 0;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index a8bf84218..7053be161 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
-
namespace {
-
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
GLsizei depth;
};
-struct FormatTuple {
- GLenum internal_format;
- GLenum format = GL_NONE;
- GLenum type = GL_NONE;
-};
-
-constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
- {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
- {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
- {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
- {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
- {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
- {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
- {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
- {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
- {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
- {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
- {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
- {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
- {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
- {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
- {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
- {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
- {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
- {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
- {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
- {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
- {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
- {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
- {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
- {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
- {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
- {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
- {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
- {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
- {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
- {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
- GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
-}};
-
constexpr std::array ACCELERATED_FORMATS{
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
};
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
- ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
- return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
-}
-
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
switch (info.type) {
case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
return GL_NONE;
}
-GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
const bool is_multisampled = num_samples > 1;
switch (type) {
- case ImageViewType::e1D:
+ case Shader::TextureType::Color1D:
return GL_TEXTURE_1D;
- case ImageViewType::e2D:
+ case Shader::TextureType::Color2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
- case ImageViewType::Cube:
+ case Shader::TextureType::ColorCube:
return GL_TEXTURE_CUBE_MAP;
- case ImageViewType::e3D:
+ case Shader::TextureType::Color3D:
return GL_TEXTURE_3D;
- case ImageViewType::e1DArray:
+ case Shader::TextureType::ColorArray1D:
return GL_TEXTURE_1D_ARRAY;
- case ImageViewType::e2DArray:
+ case Shader::TextureType::ColorArray2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
- case ImageViewType::CubeArray:
+ case Shader::TextureType::ColorArrayCube:
return GL_TEXTURE_CUBE_MAP_ARRAY;
- case ImageViewType::Rect:
- return GL_TEXTURE_RECTANGLE;
- case ImageViewType::Buffer:
+ case Shader::TextureType::Buffer:
return GL_TEXTURE_BUFFER;
}
UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
default:
return false;
}
- const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+ const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
if (format_info.is_compressed) {
return false;
@@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
- const GLuint texture = image_view->DefaultHandle();
- glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
return;
}
- const GLuint texture = image_view->Handle(ImageViewType::e3D);
+ const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
if (image_view->range.extent.layers > 1) {
// TODO: OpenGL doesn't support rendering to a fixed number of slices
glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
- for (const FormatTuple& tuple : FORMAT_TABLE) {
+ for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
const GLenum format = tuple.internal_format;
GLint compat_class;
GLint compat_type;
@@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
null_image_3d.Create(GL_TEXTURE_3D);
- null_image_rect.Create(GL_TEXTURE_RECTANGLE);
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
- glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
std::array<GLuint, 4> new_handles;
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
GL_R8, 0, 1, 0, 6);
const std::array texture_handles{
- null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
- null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
- null_image_view_2d_array.handle, null_image_view_cube.handle,
+ null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
+ null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
+ null_image_view_cube.handle,
};
for (const GLuint handle : texture_handles) {
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
}
- const auto set_view = [this](ImageViewType type, GLuint handle) {
+ const auto set_view = [this](Shader::TextureType type, GLuint handle) {
if (device.HasDebuggingToolAttached()) {
const std::string name = fmt::format("NullImage {}", type);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
null_image_views[static_cast<size_t>(type)] = handle;
};
- set_view(ImageViewType::e1D, null_image_view_1d.handle);
- set_view(ImageViewType::e2D, null_image_view_2d.handle);
- set_view(ImageViewType::Cube, null_image_view_cube.handle);
- set_view(ImageViewType::e3D, null_image_3d.handle);
- set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
- set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
- set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
- set_view(ImageViewType::Rect, null_image_rect.handle);
+ set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
+ set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
+ set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
+ set_view(Shader::TextureType::Color3D, null_image_3d.handle);
+ set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
+ set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
+ set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
- const auto& tuple = GetFormatTuple(info.format);
+ const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
gl_format = tuple.format;
gl_type = tuple.type;
@@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
- buffer.Create();
- glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
+ UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
}
-void Image::UploadMemory(const ImageBufferMap& map,
- std::span<const VideoCommon::BufferCopy> copies) {
- for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
- copy.dst_offset, copy.size);
- }
-}
-
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
if (True(image.flags & ImageFlagBits::Converted)) {
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} else {
- internal_format = GetFormatTuple(format).internal_format;
+ internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
}
VideoCommon::SubresourceRange flatten_range = info.range;
std::array<GLuint, 2> handles;
@@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
[[fallthrough]];
case ImageViewType::e1D:
glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+ SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range);
+ SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range);
break;
case ImageViewType::e2DArray:
flatten_range.extent.layers = 1;
@@ -985,62 +862,84 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.extent = {.levels = 1, .layers = 1},
};
glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
- break;
+ SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range);
+ } else {
+ glGenTextures(2, handles.data());
+ SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range);
+ SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info,
+ info.range);
}
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
break;
case ImageViewType::e3D:
glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+ SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range);
break;
case ImageViewType::CubeArray:
flatten_range.extent.layers = 6;
[[fallthrough]];
case ImageViewType::Cube:
glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+ SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range);
+ SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range);
break;
case ImageViewType::Rect:
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+ UNIMPLEMENTED();
break;
case ImageViewType::Buffer:
- glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
- SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+ UNREACHABLE();
+ break;
+ }
+ switch (info.type) {
+ case ImageViewType::e1D:
+ default_handle = Handle(Shader::TextureType::Color1D);
+ break;
+ case ImageViewType::e1DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray1D);
+ break;
+ case ImageViewType::e2D:
+ default_handle = Handle(Shader::TextureType::Color2D);
+ break;
+ case ImageViewType::e2DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray2D);
+ break;
+ case ImageViewType::e3D:
+ default_handle = Handle(Shader::TextureType::Color3D);
+ break;
+ case ImageViewType::Cube:
+ default_handle = Handle(Shader::TextureType::ColorCube);
+ break;
+ case ImageViewType::CubeArray:
+ default_handle = Handle(Shader::TextureType::ColorArrayCube);
+ break;
+ default:
break;
}
- default_handle = Handle(info.type);
}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+ : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info)
: VideoCommon::ImageViewBase{info, view_info} {}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
-void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
+void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type,
GLuint handle, const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range) {
- if (info.type == ImageViewType::Buffer) {
- // TODO: Take offset from buffer cache
- glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
- image.guest_size_bytes);
- } else {
- const GLuint parent = image.texture.handle;
- const GLenum target = ImageTarget(view_type, image.info.num_samples);
- glTextureView(handle, target, parent, internal_format, view_range.base.level,
- view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
- if (!info.IsRenderTarget()) {
- ApplySwizzle(handle, format, info.Swizzle());
- }
+ const GLuint parent = image.texture.handle;
+ const GLenum target = ImageTarget(view_type, image.info.num_samples);
+ glTextureView(handle, target, parent, internal_format, view_range.base.level,
+ view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+ if (!info.IsRenderTarget()) {
+ ApplySwizzle(handle, format, info.Swizzle());
}
if (device.HasDebuggingToolAttached()) {
- const std::string name = VideoCommon::Name(*this, view_type);
+ const std::string name = VideoCommon::Name(*this);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
stored_views.emplace_back().handle = handle;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 817b0e650..2e3e02b79 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
#include <glad/glad.h>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
@@ -127,13 +128,12 @@ private:
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
- OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
};
class Image : public VideoCommon::ImageBase {
@@ -154,8 +154,6 @@ public:
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
-
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@@ -170,7 +168,6 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
- OGLBuffer buffer;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
@@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+ const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
- [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
- return views[static_cast<size_t>(query_type)];
+ [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
+ return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -198,15 +197,25 @@ public:
return internal_format;
}
+ [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+ return gpu_addr;
+ }
+
+ [[nodiscard]] u32 BufferSize() const noexcept {
+ return buffer_size;
+ }
+
private:
- void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
+ void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle,
const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range);
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
- GLuint default_handle = 0;
GLenum internal_format = GL_NONE;
+ GLuint default_handle = 0;
+ GPUVAddr gpu_addr = 0;
+ u32 buffer_size = 0;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
#pragma once
#include <glad/glad.h>
+
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct FormatTuple {
+ GLenum internal_format;
+ GLenum format = GL_NONE;
+ GLenum type = GL_NONE;
+};
+
+constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
+ {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
+ {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
+ {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
+ {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
+ {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
+ {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
+ {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
+ {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
+ {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
+ {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
+ {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
+ {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
+ {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
+ {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
+ {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
+ {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
+ {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
+ {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
+ {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
+ {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
+ {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
+ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+ GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
+}};
+
+inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
+ ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+ return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
+}
+
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c12929de6..4e77ef808 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
- program_manager{device},
rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
@@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() {
OGLShader fragment_shader;
fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
- vertex_program.Create(true, false, vertex_shader.handle);
- fragment_program.Create(true, false, fragment_shader.handle);
-
- pipeline.Create();
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+ present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle);
// Generate presentation sampler
present_sampler.Create();
@@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
- std::data(ortho_matrix));
+ program_manager.BindProgram(present_program.handle);
+ glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
@@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
- program_manager.BindHostPipeline(pipeline.handle);
-
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- program_manager.RestoreGuestPipeline();
+ // TODO
+ // program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0b66f8332..b3ee55665 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@@ -111,9 +110,7 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
- OGLProgram vertex_program;
- OGLProgram fragment_program;
- OGLPipeline pipeline;
+ OGLProgram present_program;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 8fb5be393..51e72b705 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,7 +16,6 @@
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
@@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
- program_manager.BindHostCompute(astc_decoder_program.handle);
+ program_manager.BindProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
@@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
- program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+ program_manager.BindProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
- program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+ program_manager.BindProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
- program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+ program_manager.BindProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
@@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
- program_manager.BindHostCompute(copy_bc4_program.handle);
+ program_manager.BindProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
@@ -286,7 +285,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break;
case 4: {
// BGRA8 copy
- program_manager.BindHostCompute(copy_bgra_program.handle);
+ program_manager.BindProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index dd7d2cc0c..c6e5e059b 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -19,23 +19,6 @@
namespace Vulkan {
-struct TextureHandle {
- explicit TextureHandle(u32 data, bool via_header_index) {
- [[likely]] if (via_header_index) {
- image = data;
- sampler = data;
- }
- else {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
- }
-
- u32 image;
- u32 sampler;
-};
-
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index c52001b5a..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -140,8 +140,8 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
- static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
+ static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index feaace0c5..168ffa7e9 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -18,6 +18,9 @@
namespace Vulkan {
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker, const Shader::Info& info_,
@@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
secondary_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
- return TextureHandle{lhs_raw | rhs_raw, via_header_index};
+ return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
- return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index};
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
- const TextureHandle handle{read_handle(desc, index)};
- image_view_indices.push_back(handle.image);
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_image);
std::ranges::for_each(info.image_buffer_descriptors, add_image);
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
- const TextureHandle handle{read_handle(desc, index)};
- image_view_indices.push_back(handle.image);
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers.push_back(sampler->Handle());
}
}
@@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
ImageId* texture_buffer_ids{image_view_ids.data()};
size_t index{};
const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
- if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) {
+ if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
- is_written);
+ is_written, is_image);
++texture_buffer_ids;
++index;
}
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 9f5d30fe8..e5f54a84f 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -19,7 +19,7 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
#else
#define LAMBDA_FORCEINLINE
@@ -30,6 +30,7 @@ namespace {
using boost::container::small_vector;
using boost::container::static_vector;
using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
@@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
const u32 raw{lhs_raw | rhs_raw};
- return TextureHandle{raw, via_header_index};
+ return TexturePair(raw, via_header_index);
}
}
- return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index};
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
- const TextureHandle handle{read_handle(desc, index)};
- image_view_indices[image_index++] = handle.image;
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_index++] = handle.first;
}
}};
if constexpr (Spec::has_texture_buffers) {
@@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
- const TextureHandle handle{read_handle(desc, index)};
- image_view_indices[image_index++] = handle.image;
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_index++] = handle.first;
- Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)};
+ Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_index++] = sampler->Handle();
}
}
@@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
- if constexpr (std::is_same_v<decltype(desc), const ImageBufferDescriptor&>) {
+ if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
- is_written);
+ is_written, is_image);
++index;
++texture_buffer_index;
}
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 1334882b5..30b71bdbc 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -342,28 +342,15 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
- main_pools.ReleaseContents();
-
- std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs;
- boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs;
+ GraphicsEnvironments environments;
+ GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
- const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
- for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- if (graphics_key.unique_hashes[index] == 0) {
- continue;
- }
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
- auto& env{graphics_envs[index]};
- const u32 start_address{maxwell3d.regs.shader_config[index].offset};
- env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
- env.SetCachedSize(shader_infos[index]->size_bytes);
- envs.push_back(&env);
- }
- auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
+ main_pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
if (pipeline_cache_filename.empty()) {
return pipeline;
}
- serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] {
+ serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
env_ptrs;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 0f15ad2f7..ef14e91e7 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
return scissor;
}
-struct TextureHandle {
- constexpr TextureHandle(u32 data, bool via_header_index) {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
-
- u32 image;
- u32 sampler;
-};
-
DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
bool is_indexed) {
DrawParams params{
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index b8b8eace5..78bf90c48 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() {
return MakeShaderInfo(env, *cpu_shader_addr);
}
+void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
+ const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
+ size_t env_index{};
+ const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
+ if (unique_hashes[index] == 0) {
+ continue;
+ }
+ const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
+ auto& env{result.envs[index]};
+ const u32 start_address{maxwell3d.regs.shader_config[index].offset};
+ env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+ env.SetCachedSize(shader_infos[index]->size_bytes);
+ result.env_ptrs[env_index++] = &env;
+ }
+}
+
ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
std::scoped_lock lock{lookup_mutex};
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 89a4bcc84..136fe294c 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -4,14 +4,18 @@
#pragma once
+#include <algorithm>
+#include <array>
#include <memory>
#include <mutex>
+#include <span>
#include <unordered_map>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/shader_environment.h"
namespace Tegra {
class MemoryManager;
@@ -30,6 +34,8 @@ class ShaderCache {
static constexpr u64 PAGE_BITS = 14;
static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
+ static constexpr size_t NUM_PROGRAMS = 6;
+
struct Entry {
VAddr addr_start;
VAddr addr_end;
@@ -58,6 +64,15 @@ public:
void SyncGuestHost();
protected:
+ struct GraphicsEnvironments {
+ std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
+ std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
+
+ std::span<Shader::Environment* const> Span() const noexcept {
+ return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
+ }
+ };
+
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_);
@@ -65,17 +80,21 @@ protected:
/// @brief Update the hashes and information of shader stages
/// @param unique_hashes Shader hashes to store into when a stage is enabled
/// @return True no success, false on error
- bool RefreshStages(std::array<u64, 6>& unique_hashes);
+ bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
/// @brief Returns information about the current compute shader
/// @return Pointer to a valid shader, nullptr on error
const ShaderInfo* ComputeShader();
+ /// @brief Collect the current graphics environments
+ void GetGraphicsEnvironments(GraphicsEnvironments& result,
+ const std::array<u64, NUM_PROGRAMS>& unique_hashes);
+
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
- std::array<const ShaderInfo*, 6> shader_infos{};
+ std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
bool last_shaders_valid = false;
private:
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 5dccc0097..c93174519 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -187,8 +187,8 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
- const TextureHandle handle{raw, via_header_index};
- const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
+ const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
+ const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
const Shader::TextureType result{ConvertType(entry)};
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 37d712045..d26dbfaab 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -29,22 +29,6 @@ class Memorymanager;
namespace VideoCommon {
-struct TextureHandle {
- explicit TextureHandle(u32 data, bool via_header_index) {
- if (via_header_index) {
- image = data;
- sampler = data;
- } else {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
- }
-
- u32 image;
- u32 sampler;
-};
-
class GenericEnvironment : public Shader::Environment {
public:
explicit GenericEnvironment() = default;
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index d10ba4ccd..249cc4d0f 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
return "Invalid";
}
-std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
+std::string Name(const ImageViewBase& image_view) {
const u32 width = image_view.size.width;
const u32 height = image_view.size.height;
const u32 depth = image_view.size.depth;
@@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
const u32 num_layers = image_view.range.extent.layers;
const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
- switch (type.value_or(image_view.type)) {
+ switch (image_view.type) {
case ImageViewType::e1D:
return fmt::format("ImageView 1D {}{}", width, level);
case ImageViewType::e2D:
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index a48413983..c6cf0583f 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -255,8 +255,7 @@ struct RenderTargets;
[[nodiscard]] std::string Name(const ImageBase& image);
-[[nodiscard]] std::string Name(const ImageViewBase& image_view,
- std::optional<ImageViewType> type = std::nullopt);
+[[nodiscard]] std::string Name(const ImageViewBase& image_view);
[[nodiscard]] std::string Name(const RenderTargets& render_targets);
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c1d14335e..1a9399455 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -154,6 +154,15 @@ union TextureHandle {
};
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
+[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
+ if (via_header_index) {
+ return {raw, raw};
+ } else {
+ const Tegra::Texture::TextureHandle handle{raw};
+ return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
+ }
+}
+
struct TICEntry {
union {
struct {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 2318c1bda..e27a2b51e 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
.pNext = nullptr,
- .storageBuffer16BitAccess = false,
+ .storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = false,
.storageInputOutput16 = false,