summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_device.h13
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp85
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h68
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp504
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h63
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h15
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h19
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1454
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h286
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h13
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp49
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h1
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp224
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h51
-rw-r--r--src/video_core/renderer_opengl/utils.cpp42
-rw-r--r--src/video_core/renderer_opengl/utils.h16
31 files changed, 1815 insertions, 1325 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 60735d502..5772cad87 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- const Device& device_, std::size_t stream_size_)
- : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_,
- std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)},
- device{device_} {
+ const Device& device_, OGLStreamBuffer& stream_buffer_,
+ StateTracker& state_tracker)
+ : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
if (!device.HasFastBufferSubData()) {
return;
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 95251e26b..17ee90316 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -22,6 +22,7 @@ namespace OpenGL {
class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
+class StateTracker;
class Buffer : public VideoCommon::BufferBlock {
public:
@@ -52,9 +53,10 @@ private:
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
public:
- explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- const Device& device_, std::size_t stream_size_);
+ explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
+ const Device& device, OGLStreamBuffer& stream_buffer,
+ StateTracker& state_tracker);
~OGLBufferCache();
BufferInfo GetEmptyBuffer(std::size_t) override;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a94e4f72e..b24179d59 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,9 +5,11 @@
#include <algorithm>
#include <array>
#include <cstddef>
+#include <cstdlib>
#include <cstring>
#include <limits>
#include <optional>
+#include <span>
#include <vector>
#include <glad/glad.h>
@@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
-constexpr std::array LimitUBOs = {
+constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
- GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
-
-constexpr std::array LimitSSBOs = {
+ GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
+};
+constexpr std::array LIMIT_SSBOS = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
-
-constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
- GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS,
- GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
-
-constexpr std::array LimitImages = {
+ GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
+};
+constexpr std::array LIMIT_SAMPLERS = {
+ GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
+ GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
+};
+constexpr std::array LIMIT_IMAGES = {
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
- GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
+ GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
+};
template <typename T>
T GetInteger(GLenum pname) {
@@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
return extensions;
}
-bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
- return std::find(images.begin(), images.end(), extension) != images.end();
+bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
+ return std::ranges::find(extensions, extension) != extensions.end();
}
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
@@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
- std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
- [](GLenum pname) { return GetInteger<u32>(pname); });
+ std::ranges::transform(LIMIT_UBOS, max.begin(),
+ [](GLenum pname) { return GetInteger<u32>(pname); });
return max;
}
@@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
- Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
- Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
- Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
+ Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
+ Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
+ Extract(base_samplers, num_samplers, total_samplers / NumStages,
+ LIMIT_SAMPLERS[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
@@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
// Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
- Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
+ Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
// This is guaranteed to be at least 1.
const u32 total_extracted_images = num_images / (NumStages - 1);
@@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
continue;
}
bindings[stage].image =
- Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
+ Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
}
// Compute doesn't care about any of this.
@@ -188,6 +193,11 @@ bool IsASTCSupported() {
return true;
}
+[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
+ const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
+ return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
+}
+
} // Anonymous namespace
Device::Device()
@@ -206,9 +216,8 @@ Device::Device()
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
-
- uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
- shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
+ uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
+ shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
@@ -224,6 +233,7 @@ Device::Device()
has_precise_bug = TestPreciseBug();
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
+ has_debugging_tool_attached = IsDebugToolAttached(extensions);
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8a4b6b9fc..13e66846c 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -36,11 +36,11 @@ public:
return GetBaseBindings(static_cast<std::size_t>(shader_type));
}
- std::size_t GetUniformBufferAlignment() const {
+ size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
- std::size_t GetShaderStorageBufferAlignment() const {
+ size_t GetShaderStorageBufferAlignment() const {
return shader_storage_alignment;
}
@@ -104,6 +104,10 @@ public:
return has_nv_viewport_array2;
}
+ bool HasDebuggingToolAttached() const {
+ return has_debugging_tool_attached;
+ }
+
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
@@ -118,8 +122,8 @@ private:
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
- std::size_t uniform_buffer_alignment{};
- std::size_t shader_storage_alignment{};
+ size_t uniform_buffer_alignment{};
+ size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
@@ -135,6 +139,7 @@ private:
bool has_precise_bug{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
+ bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 6040646cb..3e9c922f5 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -46,7 +46,7 @@ void GLInnerFence::Wait() {
}
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_,
+ Tegra::GPU& gpu_, TextureCache& texture_cache_,
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 39ca6125b..30dbee613 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -33,12 +33,12 @@ private:
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
+ VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_,
+ TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
QueryCache& query_cache_);
protected:
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
deleted file mode 100644
index b8a512cb6..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
-
-namespace OpenGL {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using VideoCore::Surface::SurfaceType;
-
-FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
-
-FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
-
-GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
- const auto [entry, is_cache_miss] = cache.try_emplace(key);
- auto& framebuffer{entry->second};
- if (is_cache_miss) {
- framebuffer = CreateFramebuffer(key);
- }
- return framebuffer.handle;
-}
-
-OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
- OGLFramebuffer framebuffer;
- framebuffer.Create();
-
- // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
-
- if (key.zeta) {
- const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
- const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
- key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
- }
-
- std::size_t num_buffers = 0;
- std::array<GLenum, Maxwell::NumRenderTargets> targets;
-
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- if (!key.colors[index]) {
- targets[index] = GL_NONE;
- continue;
- }
- const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
- key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
-
- const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
- targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
- num_buffers = index + 1;
- }
-
- if (num_buffers > 0) {
- glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
- } else {
- glDrawBuffer(GL_NONE);
- }
-
- return framebuffer;
-}
-
-std::size_t FramebufferCacheKey::Hash() const noexcept {
- std::size_t hash = std::hash<View>{}(zeta);
- for (const auto& color : colors) {
- hash ^= std::hash<View>{}(color);
- }
- hash ^= static_cast<std::size_t>(color_attachments) << 16;
- return hash;
-}
-
-bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
- return std::tie(colors, zeta, color_attachments) ==
- std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
deleted file mode 100644
index 8f698fee0..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <unordered_map>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_texture_cache.h"
-
-namespace OpenGL {
-
-constexpr std::size_t BitsPerAttachment = 4;
-
-struct FramebufferCacheKey {
- View zeta;
- std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
- u32 color_attachments = 0;
-
- std::size_t Hash() const noexcept;
-
- bool operator==(const FramebufferCacheKey& rhs) const noexcept;
-
- bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- void SetAttachment(std::size_t index, u32 attachment) {
- color_attachments |= attachment << (BitsPerAttachment * index);
- }
-};
-
-} // namespace OpenGL
-
-namespace std {
-
-template <>
-struct hash<OpenGL::FramebufferCacheKey> {
- std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
-
-namespace OpenGL {
-
-class FramebufferCacheOpenGL {
-public:
- FramebufferCacheOpenGL();
- ~FramebufferCacheOpenGL();
-
- GLuint GetFramebuffer(const FramebufferCacheKey& key);
-
-private:
- OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
-
- std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e58e84759..8aa63d329 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,12 +25,15 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h"
+#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
@@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
namespace {
-constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
-constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
+constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
+constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
-constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
+constexpr size_t TOTAL_CONST_BUFFER_BYTES =
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
-constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
+constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
+constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
+
+constexpr size_t MAX_TEXTURES = 192;
+constexpr size_t MAX_IMAGES = 48;
+
+struct TextureHandle {
+ constexpr TextureHandle(u32 data, bool via_header_index) {
+ const Tegra::Texture::TextureHandle handle{data};
+ image = handle.tic_id;
+ sampler = via_header_index ? image : handle.tsc_id.Value();
+ }
+
+ u32 image;
+ u32 sampler;
+};
template <typename Engine, typename Entry>
-Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- ShaderType shader_type, std::size_t index = 0) {
+TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
+ ShaderType shader_type, size_t index = 0) {
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
if (entry.is_separated) {
const u32 buffer_1 = entry.buffer;
@@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
const u32 offset_2 = entry.secondary_offset;
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return engine.GetTextureInfo(handle_1 | handle_2);
+ return TextureHandle(handle_1 | handle_2, via_header_index);
}
}
if (entry.is_bindless) {
- const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return engine.GetTextureInfo(handle);
- }
-
- const auto& gpu_profile = engine.AccessGuestDriverProfile();
- const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
- if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
- return engine.GetStageTexture(shader_type, offset);
- } else {
- return engine.GetTexture(offset);
+ const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+ return TextureHandle(raw, via_header_index);
}
+ const u32 buffer = engine.GetBoundBuffer();
+ const u64 offset = (entry.offset + index) * sizeof(u32);
+ return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
@@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
if (!entry.IsIndirect()) {
return entry.GetSize();
}
-
if (buffer.size > Maxwell::MaxConstBufferSize) {
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
Maxwell::MaxConstBufferSize);
@@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss
reinterpret_cast<const GLuint*>(ssbos));
}
+ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
+ if (entry.is_buffer) {
+ return ImageViewType::Buffer;
+ }
+ switch (entry.type) {
+ case Tegra::Shader::TextureType::Texture1D:
+ return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
+ case Tegra::Shader::TextureType::Texture2D:
+ return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
+ case Tegra::Shader::TextureType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::TextureType::TextureCube:
+ return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
+ }
+ UNREACHABLE();
+ return ImageViewType::e2D;
+}
+
+ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
+ switch (entry.type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return ImageViewType::e1D;
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return ImageViewType::e1DArray;
+ case Tegra::Shader::ImageType::Texture2D:
+ return ImageViewType::e2D;
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return ImageViewType::e2DArray;
+ case Tegra::Shader::ImageType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return ImageViewType::Buffer;
+ }
+ UNREACHABLE();
+ return ImageViewType::e2D;
+}
+
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_)
- : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
+ : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
- texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
+ stream_buffer(device, state_tracker),
+ texture_cache_runtime(device, program_manager, state_tracker),
+ texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory),
- buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE),
+ buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window_) {
- CheckExtensions();
-
unified_uniform_buffer.Create();
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
@@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
nullptr, 0);
}
}
-
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
@@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
-void RasterizerOpenGL::CheckExtensions() {
- if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
- LOG_WARNING(
- Render_OpenGL,
- "Anisotropic filter is not supported! This can cause graphical issues in some games.");
- }
-}
-
void RasterizerOpenGL::SetupVertexFormat() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
@@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
return info.offset;
}
-void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
+void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader);
u32 clip_distances = 0;
+ std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
+ image_view_indices.clear();
+ sampler_handles.clear();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
continue;
}
-
// Currently this stages are not supported in the OpenGL backend.
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
if (program == Maxwell::ShaderProgram::TesselationControl ||
@@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
-
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
default:
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
+ break;
}
// Stage indices are 0 - 5
- const std::size_t stage = index == 0 ? 0 : index - 1;
+ const size_t stage = index == 0 ? 0 : index - 1;
+ shaders[stage] = shader;
+
SetupDrawConstBuffers(stage, shader);
SetupDrawGlobalMemory(stage, shader);
- SetupDrawTextures(stage, shader);
- SetupDrawImages(stage, shader);
+ SetupDrawTextures(shader, stage);
+ SetupDrawImages(shader, stage);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
++index;
}
}
-
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ size_t image_view_index = 0;
+ size_t texture_index = 0;
+ size_t image_index = 0;
+ for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+ const Shader* const shader = shaders[stage];
+ if (shader) {
+ const auto base = device.GetBaseBindings(stage);
+ BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
+ texture_index, image_index);
+ }
+ }
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
-void RasterizerOpenGL::ConfigureFramebuffers() {
- MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
- return;
- }
- maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
-
- texture_cache.GuardRenderTargets(true);
-
- View depth_surface = texture_cache.GetDepthBufferSurface(true);
-
- const auto& regs = maxwell3d.regs;
- UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
-
- // Bind the framebuffer surfaces
- FramebufferCacheKey key;
- const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
- for (std::size_t index = 0; index < colors_count; ++index) {
- View color_surface{texture_cache.GetColorBufferSurface(index, true)};
- if (!color_surface) {
- continue;
- }
- // Assume that a surface will be written to if it is used as a framebuffer, even
- // if the shader doesn't actually write to it.
- texture_cache.MarkColorBufferInUse(index);
-
- key.SetAttachment(index, regs.rt_control.GetMap(index));
- key.colors[index] = std::move(color_surface);
- }
-
- if (depth_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even if
- // the shader doesn't actually write to it.
- texture_cache.MarkDepthBufferInUse();
- key.zeta = std::move(depth_surface);
- }
-
- texture_cache.GuardRenderTargets(false);
-
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
-}
-
-void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
- const auto& regs = maxwell3d.regs;
-
- texture_cache.GuardRenderTargets(true);
- View color_surface;
-
- if (using_color) {
- // Determine if we have to preserve the contents.
- // First we have to make sure all clear masks are enabled.
- bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
- !regs.clear_buffers.B || !regs.clear_buffers.A;
- const std::size_t index = regs.clear_buffers.RT;
- if (regs.clear_flags.scissor) {
- // Then we have to confirm scissor testing clears the whole image.
- const auto& scissor = regs.scissor_test[0];
- preserve_contents |= scissor.min_x > 0;
- preserve_contents |= scissor.min_y > 0;
- preserve_contents |= scissor.max_x < regs.rt[index].width;
- preserve_contents |= scissor.max_y < regs.rt[index].height;
- }
-
- color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
- texture_cache.MarkColorBufferInUse(index);
- }
-
- View depth_surface;
- if (using_depth_stencil) {
- bool preserve_contents = false;
- if (regs.clear_flags.scissor) {
- // For depth stencil clears we only have to confirm scissor test covers the whole image.
- const auto& scissor = regs.scissor_test[0];
- preserve_contents |= scissor.min_x > 0;
- preserve_contents |= scissor.min_y > 0;
- preserve_contents |= scissor.max_x < regs.zeta_width;
- preserve_contents |= scissor.max_y < regs.zeta_height;
- }
-
- depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
- texture_cache.MarkDepthBufferInUse();
- }
- texture_cache.GuardRenderTargets(false);
-
- FramebufferCacheKey key;
- key.colors[0] = std::move(color_surface);
- key.zeta = std::move(depth_surface);
-
- state_tracker.NotifyFramebuffer();
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
-}
-
void RasterizerOpenGL::Clear() {
if (!maxwell3d.ShouldExecute()) {
return;
@@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {
regs.clear_buffers.A) {
use_color = true;
- state_tracker.NotifyColorMask0();
- glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
+ const GLuint index = regs.clear_buffers.RT;
+ state_tracker.NotifyColorMask(index);
+ glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
// TODO(Rodrigo): Determine if clamping is used on clears
@@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {
state_tracker.NotifyScissor0();
glDisablei(GL_SCISSOR_TEST, 0);
}
-
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.UpdateRenderTargets(true);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+ }
if (use_color) {
- glClearBufferfv(GL_COLOR, 0, regs.clear_color);
+ glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
-
if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
} else if (use_depth) {
@@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
// Prepare the vertex array.
- const bool invalidated = buffer_cache.Map(buffer_size);
-
- if (invalidated) {
- // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
- auto& dirty = maxwell3d.dirty.flags;
- dirty[Dirty::VertexBuffers] = true;
- for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
- dirty[index] = true;
- }
- }
+ buffer_cache.Map(buffer_size);
// Prepare vertex array format.
SetupVertexFormat();
@@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
// Setup shaders and their used resources.
- texture_cache.GuardSamplers(true);
- const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
- SetupShaders(primitive_mode);
- texture_cache.GuardSamplers(false);
-
- ConfigureFramebuffers();
+ auto lock = texture_cache.AcquireLock();
+ SetupShaders();
// Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap();
-
+ texture_cache.UpdateRenderTargets(false);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
- if (texture_cache.TextureBarrier()) {
- glTextureBarrier();
- }
-
+ const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
@@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Acquire();
current_cbuf = 0;
- auto kernel = shader_cache.GetComputeKernel(code_addr);
- program_manager.BindCompute(kernel->GetHandle());
+ Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
- SetupComputeTextures(kernel);
- SetupComputeImages(kernel);
+ auto lock = texture_cache.AcquireLock();
+ BindComputeTextures(kernel);
- const std::size_t buffer_size =
- Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+ const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
+ (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
SetupComputeConstBuffers(kernel);
@@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
const auto& launch_desc = kepler_compute.launch_description;
- program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.FlushRegion(addr, size);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.DownloadMemory(addr, size);
+ }
buffer_cache.FlushRegion(addr, size);
query_cache.FlushRegion(addr, size);
}
@@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
- return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+ return texture_cache.IsRegionGpuModified(addr, size) ||
+ buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.InvalidateRegion(addr, size);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.WriteMemory(addr, size);
+ }
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
@@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.OnCPUWrite(addr, size);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.WriteMemory(addr, size);
+ }
shader_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
}
+void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.UnmapMemory(addr, size);
+ }
+ buffer_cache.OnCPUWrite(addr, size);
+ shader_cache.OnCPUWrite(addr, size);
+}
+
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
@@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
}
+void RasterizerOpenGL::FragmentBarrier() {
+ glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
+}
+
+void RasterizerOpenGL::TiledCacheBarrier() {
+ glTextureBarrier();
+}
+
void RasterizerOpenGL::FlushCommands() {
// Only flush when we have commands queued to OpenGL.
if (num_queued_commands == 0) {
@@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() {
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
num_queued_commands = 0;
+ fence_manager.TickFrame();
buffer_cache.TickFrame();
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.TickFrame();
+ }
}
-bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
- texture_cache.DoFermiCopy(src, dst, copy_config);
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.BlitImage(dst, src, copy_config);
return true;
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
- if (!framebuffer_addr) {
- return {};
+ if (framebuffer_addr == 0) {
+ return false;
}
-
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
- if (!surface) {
- return {};
+ auto lock = texture_cache.AcquireLock();
+ ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
+ if (!image_view) {
+ return false;
}
-
// Verify that the cached surface is the same size and format as the requested framebuffer
- const auto& params{surface->GetSurfaceParams()};
- const auto& pixel_format{
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
- ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
- ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
+ // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
+ // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
- if (params.pixel_format != pixel_format) {
- LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
- }
+ screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+ screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
+ return true;
+}
- screen_info.display_texture = surface->GetTexture();
- screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
+void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
+ image_view_indices.clear();
+ sampler_handles.clear();
- return true;
+ texture_cache.SynchronizeComputeDescriptors();
+
+ SetupComputeTextures(kernel);
+ SetupComputeImages(kernel);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ program_manager.BindCompute(kernel->GetHandle());
+ size_t image_view_index = 0;
+ size_t texture_index = 0;
+ size_t image_index = 0;
+ BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
+}
+
+void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
+ GLuint base_image, size_t& image_view_index,
+ size_t& texture_index, size_t& image_index) {
+ const GLuint* const samplers = sampler_handles.data() + texture_index;
+ const GLuint* const textures = texture_handles.data() + texture_index;
+ const GLuint* const images = image_handles.data() + image_index;
+
+ const size_t num_samplers = entries.samplers.size();
+ for (const auto& sampler : entries.samplers) {
+ for (size_t i = 0; i < sampler.size; ++i) {
+ const ImageViewId image_view_id = image_view_ids[image_view_index++];
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
+ texture_handles[texture_index++] = handle;
+ }
+ }
+ const size_t num_images = entries.images.size();
+ for (size_t unit = 0; unit < num_images; ++unit) {
+ // TODO: Mark as modified
+ const ImageViewId image_view_id = image_view_ids[image_view_index++];
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
+ image_handles[image_index] = handle;
+ ++image_index;
+ }
+ if (num_samplers > 0) {
+ glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
+ glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
+ }
+ if (num_images > 0) {
+ glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
+ }
}
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
@@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
-
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries};
@@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
}
}
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
- MICROPROFILE_SCOPE(OpenGL_Texture);
- u32 binding = device.GetBaseBindings(stage_index).sampler;
+void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
+ const bool via_header_index =
+ maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
- SetupTexture(binding++, texture, entry);
+ for (size_t index = 0; index < entry.size; ++index) {
+ const auto handle =
+ GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
+ const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
+ image_view_indices.push_back(handle.image);
}
}
}
-void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
- MICROPROFILE_SCOPE(OpenGL_Texture);
- u32 binding = 0;
+void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : kernel->GetEntries().samplers) {
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
- SetupTexture(binding++, texture, entry);
+ for (size_t i = 0; i < entry.size; ++i) {
+ const auto handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
+ const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
+ image_view_indices.push_back(handle.image);
}
}
}
-void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const SamplerEntry& entry) {
- const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
- if (!view) {
- // Can occur when texture addr is null or its memory is unmapped/invalid
- glBindSampler(binding, 0);
- glBindTextureUnit(binding, 0);
- return;
- }
- const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
- glBindTextureUnit(binding, handle);
- if (!view->GetSurfaceParams().IsBuffer()) {
- glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
- }
-}
-
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
- u32 binding = device.GetBaseBindings(stage_index).image;
+void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
+ const bool via_header_index =
+ maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<ShaderType>(stage_index);
- const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
- SetupImage(binding++, tic, entry);
+ const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
- u32 binding = 0;
+void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : shader->GetEntries().images) {
- const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
- SetupImage(binding++, tic, entry);
+ const auto handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
- const ImageEntry& entry) {
- const auto view = texture_cache.GetImageSurface(tic, entry);
- if (!view) {
- glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
- return;
- }
- if (entry.is_written) {
- view->MarkAsModified(texture_cache.Tick());
- }
- const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
- glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
-}
-
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
@@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
flags[Dirty::PointSize] = false;
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
+ oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
- if (maxwell3d.regs.vp_point_size.enable) {
- // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
- glEnable(GL_PROGRAM_POINT_SIZE);
- return;
- }
-
- // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
- // in OpenGL).
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
- glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncLineState() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index de28cff15..82e03e677 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,12 +7,13 @@
#include <array>
#include <atomic>
#include <cstddef>
-#include <map>
#include <memory>
#include <optional>
#include <tuple>
#include <utility>
+#include <boost/container/static_vector.hpp>
+
#include <glad/glad.h>
#include "common/common_types.h"
@@ -23,16 +24,14 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
-#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
+#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
@@ -51,7 +50,7 @@ class MemoryManager;
namespace OpenGL {
struct ScreenInfo;
-struct DrawParameters;
+struct ShaderEntries;
struct BindlessSSBO {
GLuint64EXT address;
@@ -79,15 +78,18 @@ public:
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
+ void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
+ void FragmentBarrier() override;
+ void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
- bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+ bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
@@ -108,11 +110,14 @@ public:
}
private:
- /// Configures the color and depth framebuffer states.
- void ConfigureFramebuffers();
+ static constexpr size_t MAX_TEXTURES = 192;
+ static constexpr size_t MAX_IMAGES = 48;
+ static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
+
+ void BindComputeTextures(Shader* kernel);
- /// Configures the color and depth framebuffer for clearing.
- void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
+ void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
+ size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
@@ -136,23 +141,16 @@ private:
size_t size, BindlessSSBO* ssbo);
/// Configures the current textures to use for the draw command.
- void SetupDrawTextures(std::size_t stage_index, Shader* shader);
+ void SetupDrawTextures(const Shader* shader, size_t stage_index);
/// Configures the textures used in a compute shader.
- void SetupComputeTextures(Shader* kernel);
-
- /// Configures a texture.
- void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const SamplerEntry& entry);
+ void SetupComputeTextures(const Shader* kernel);
/// Configures images in a graphics shader.
- void SetupDrawImages(std::size_t stage_index, Shader* shader);
+ void SetupDrawImages(const Shader* shader, size_t stage_index);
/// Configures images in a compute shader.
- void SetupComputeImages(Shader* shader);
-
- /// Configures an image.
- void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
+ void SetupComputeImages(const Shader* shader);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -227,9 +225,6 @@ private:
/// End a transform feedback
void EndTransformFeedback();
- /// Check for extension that are not strictly required but are needed for correct emulation
- void CheckExtensions();
-
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
@@ -242,7 +237,7 @@ private:
GLintptr SetupIndexBuffer();
- void SetupShaders(GLenum primitive_mode);
+ void SetupShaders();
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -254,19 +249,21 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
- TextureCacheOpenGL texture_cache;
+ OGLStreamBuffer stream_buffer;
+ TextureCacheRuntime texture_cache_runtime;
+ TextureCache texture_cache;
ShaderCacheOpenGL shader_cache;
- SamplerCacheOpenGL sampler_cache;
- FramebufferCacheOpenGL framebuffer_cache;
QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
- static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
-
- GLint vertex_binding = 0;
+ boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
+ std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
+ boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
+ std::array<GLuint, MAX_TEXTURES> texture_handles;
+ std::array<GLuint, MAX_IMAGES> image_handles;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
@@ -280,7 +277,7 @@ private:
std::size_t current_cbuf = 0;
OGLBuffer unified_uniform_buffer;
- /// Number of commands queued to the OpenGL driver. Reseted on flush.
+ /// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
u32 last_clip_distance_mask = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0ebcec427..0e34a0f20 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -71,7 +71,7 @@ void OGLSampler::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- glGenSamplers(1, &handle);
+ glCreateSamplers(1, &handle);
}
void OGLSampler::Release() {
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
deleted file mode 100644
index 5c174879a..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_sampler_cache.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
-
-namespace OpenGL {
-
-SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
-
-SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
-
-OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
- OGLSampler sampler;
- sampler.Create();
-
- const GLuint sampler_id{sampler.handle};
- glSamplerParameteri(
- sampler_id, GL_TEXTURE_MAG_FILTER,
- MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
- glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
- MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
- tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
- MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
- glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
- glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
- glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
- if (GLAD_GL_ARB_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
- } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
- } else {
- LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
- }
-
- return sampler;
-}
-
-GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
- return sampler.handle;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
deleted file mode 100644
index 34ee37f00..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <glad/glad.h>
-
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/sampler_cache.h"
-
-namespace OpenGL {
-
-class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
-public:
- explicit SamplerCacheOpenGL();
- ~SamplerCacheOpenGL();
-
-protected:
- OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
-
- GLuint ToSamplerType(const OGLSampler& sampler) const override;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index eabfdea5d..d4841fdb7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -27,7 +27,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ccbdfe967..2e1fa252d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using Tegra::Shader::TextureType;
-using VideoCommon::Shader::BuildTransformFeedback;
-using VideoCommon::Shader::Registry;
-using namespace std::string_literals;
using namespace VideoCommon::Shader;
+using namespace std::string_literals;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
@@ -2753,11 +2751,11 @@ private:
}
}
- std::string GetSampler(const Sampler& sampler) const {
+ std::string GetSampler(const SamplerEntry& sampler) const {
return AppendSuffix(sampler.index, "sampler");
}
- std::string GetImage(const Image& image) const {
+ std::string GetImage(const ImageEntry& image) const {
return AppendSuffix(image.index, "image");
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index c4ff47875..be68994bb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,8 +20,8 @@ namespace OpenGL {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::Sampler;
-using ImageEntry = VideoCommon::Shader::Image;
+using SamplerEntry = VideoCommon::Shader::SamplerEntry;
+using ImageEntry = VideoCommon::Shader::ImageEntry;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 691c6c79b..553e6e8d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
}
}
+void ProgramManager::BindHostCompute(GLuint program) {
+ if (use_assembly_programs) {
+ glDisable(GL_COMPUTE_PROGRAM_NV);
+ }
+ glUseProgram(program);
+ is_graphics_bound = false;
+}
+
+void ProgramManager::RestoreGuestCompute() {
+ if (use_assembly_programs) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ glUseProgram(0);
+ }
+}
+
void ProgramManager::UseVertexShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 950e0dfcb..ad42cce74 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,6 +45,12 @@ public:
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
+ /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
+ void BindHostCompute(GLuint program);
+
+ /// Rewinds BindHostCompute state changes.
+ void RestoreGuestCompute();
+
void UseVertexShader(GLuint program);
void UseGeometryShader(GLuint program);
void UseFragmentShader(GLuint program);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 45f4fc565..60e6fa39f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
}
}
+void StateTracker::InvalidateStreamBuffer() {
+ flags[Dirty::VertexBuffers] = true;
+ for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
+ flags[index] = true;
+ }
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 9d127548f..574615d3c 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -92,6 +92,8 @@ class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
+ void InvalidateStreamBuffer();
+
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
@@ -100,6 +102,14 @@ public:
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
+ void BindFramebuffer(GLuint new_framebuffer) {
+ if (framebuffer == new_framebuffer) {
+ return;
+ }
+ framebuffer = new_framebuffer;
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
+ }
+
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
@@ -129,9 +139,9 @@ public:
flags[OpenGL::Dirty::Scissor0] = true;
}
- void NotifyColorMask0() {
+ void NotifyColorMask(size_t index) {
flags[OpenGL::Dirty::ColorMasks] = true;
- flags[OpenGL::Dirty::ColorMask0] = true;
+ flags[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
@@ -190,6 +200,7 @@ public:
private:
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+ GLuint framebuffer = 0;
GLuint index_buffer = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 887995cf4..e0819cdf2 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,6 +9,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
-OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
- : buffer_size(size) {
+OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
+ : state_tracker{state_tracker_} {
gl_buffer.Create();
- GLsizeiptr allocate_size = size;
- if (vertex_data_usage) {
- // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
- // read position is near the end and is an out-of-bound access to the vertex buffer. This is
- // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
- // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
- // crash.
- allocate_size *= 2;
- }
-
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
- glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
+ glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
mapped_ptr = static_cast<u8*>(
- glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
+ glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
@@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
gl_buffer.Release();
}
-std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
- ASSERT(size <= buffer_size);
- ASSERT(alignment <= buffer_size);
+std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
+ ASSERT(size <= BUFFER_SIZE);
+ ASSERT(alignment <= BUFFER_SIZE);
mapped_size = size;
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}
- bool invalidate = false;
- if (buffer_pos + size > buffer_size) {
+ if (buffer_pos + size > BUFFER_SIZE) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
glInvalidateBufferData(gl_buffer.handle);
+ state_tracker.InvalidateStreamBuffer();
buffer_pos = 0;
- invalidate = true;
}
- return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
+ return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 307a67113..dd9cf67eb 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -4,29 +4,31 @@
#pragma once
-#include <tuple>
+#include <utility>
+
#include <glad/glad.h>
+
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class Device;
+class StateTracker;
class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
+ explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
~OGLStreamBuffer();
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
- * The return values are the pointer to the new chunk, the offset within the buffer,
- * and the invalidation flag for previous chunks.
+ * The return values are the pointer to the new chunk, and the offset within the buffer.
* The actual used size must be specified on unmapping the chunk.
*/
- std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
+ std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
void Unmap(GLsizeiptr size);
@@ -39,15 +41,18 @@ public:
}
GLsizeiptr Size() const noexcept {
- return buffer_size;
+ return BUFFER_SIZE;
}
private:
+ static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
+
+ StateTracker& state_tracker;
+
OGLBuffer gl_buffer;
GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
- GLsizeiptr buffer_size = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index daf352b50..4c690418c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -2,173 +2,238 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "common/assert.h"
-#include "common/bit_util.h"
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "common/scope_exit.h"
-#include "core/core.h"
-#include "video_core/morton.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <string>
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/renderer_opengl/utils.h"
-#include "video_core/texture_cache/surface_base.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+#include "video_core/renderer_opengl/util_shaders.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
-#include "video_core/textures/convert.h"
-#include "video_core/textures/texture.h"
+#include "video_core/textures/decoders.h"
namespace OpenGL {
-using Tegra::Texture::SwizzleSource;
-using VideoCore::MortonSwizzleMode;
+namespace {
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TextureMipmapFilter;
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+using VideoCommon::CalculateLevelStrideAlignment;
+using VideoCommon::ImageCopy;
+using VideoCommon::ImageFlagBits;
+using VideoCommon::ImageType;
+using VideoCommon::NUM_RT;
+using VideoCommon::SamplesLog2;
+using VideoCommon::SwizzleParameters;
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::IsPixelFormatSRGB;
+using VideoCore::Surface::MaxPixelFormat;
using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
-MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
- MP_RGB(128, 192, 128));
+struct CopyOrigin {
+ GLint level;
+ GLint x;
+ GLint y;
+ GLint z;
+};
-namespace {
+struct CopyRegion {
+ GLsizei width;
+ GLsizei height;
+ GLsizei depth;
+};
struct FormatTuple {
GLenum internal_format;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
+ GLenum store_format = internal_format;
};
-constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
- {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
- {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
- {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
- {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
- {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
- {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
- {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
- {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
- {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
- {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
- {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
- {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
- {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
- {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
- {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
- {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
- {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
- {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
- {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
- {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
- {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
- {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
- {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
- {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
- {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
- {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
- {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
- {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
- {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
- {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
- {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
- {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- // Compressed sRGB formats
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
-
- // Depth formats
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
-
- // DepthStencil formats
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
+ {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
+ {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
+ {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
+ {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
+ {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
+ {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
+ {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
+ {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
+ {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
+ {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
+ {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
+ {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
+ {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
+ {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
+ {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
+ {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
+ {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
+ {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
+ {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
+ {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
+ {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
+ {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
+ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
+ {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
+constexpr std::array ACCELERATED_FORMATS{
+ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
+ GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
+ GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I,
+ GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I,
+ GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16,
+ GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM,
+ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
+};
+
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
- ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
- return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
+ ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+ return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
}
-GLenum GetTextureTarget(const SurfaceTarget& target) {
- switch (target) {
- case SurfaceTarget::TextureBuffer:
+GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
+ switch (info.type) {
+ case ImageType::e1D:
+ return GL_TEXTURE_1D_ARRAY;
+ case ImageType::e2D:
+ if (info.num_samples > 1) {
+ return GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
+ }
+ return GL_TEXTURE_2D_ARRAY;
+ case ImageType::e3D:
+ return GL_TEXTURE_3D;
+ case ImageType::Linear:
+ return GL_TEXTURE_2D_ARRAY;
+ case ImageType::Buffer:
return GL_TEXTURE_BUFFER;
- case SurfaceTarget::Texture1D:
+ }
+ UNREACHABLE_MSG("Invalid image type={}", info.type);
+ return GL_NONE;
+}
+
+GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+ const bool is_multisampled = num_samples > 1;
+ switch (type) {
+ case ImageViewType::e1D:
return GL_TEXTURE_1D;
- case SurfaceTarget::Texture2D:
- return GL_TEXTURE_2D;
- case SurfaceTarget::Texture3D:
+ case ImageViewType::e2D:
+ return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
+ case ImageViewType::Cube:
+ return GL_TEXTURE_CUBE_MAP;
+ case ImageViewType::e3D:
return GL_TEXTURE_3D;
- case SurfaceTarget::Texture1DArray:
+ case ImageViewType::e1DArray:
return GL_TEXTURE_1D_ARRAY;
- case SurfaceTarget::Texture2DArray:
- return GL_TEXTURE_2D_ARRAY;
- case SurfaceTarget::TextureCubemap:
- return GL_TEXTURE_CUBE_MAP;
- case SurfaceTarget::TextureCubeArray:
+ case ImageViewType::e2DArray:
+ return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
+ case ImageViewType::CubeArray:
return GL_TEXTURE_CUBE_MAP_ARRAY;
+ case ImageViewType::Rect:
+ return GL_TEXTURE_RECTANGLE;
+ case ImageViewType::Buffer:
+ return GL_TEXTURE_BUFFER;
}
- UNREACHABLE();
- return {};
+ UNREACHABLE_MSG("Invalid image view type={}", type);
+ return GL_NONE;
}
-GLint GetSwizzleSource(SwizzleSource source) {
+GLenum TextureMode(PixelFormat format, bool is_first) {
+ switch (format) {
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+ case PixelFormat::S8_UINT_D24_UNORM:
+ return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+ default:
+ UNREACHABLE();
+ return GL_DEPTH_COMPONENT;
+ }
+}
+
+GLint Swizzle(SwizzleSource source) {
switch (source) {
case SwizzleSource::Zero:
return GL_ZERO;
@@ -184,530 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) {
case SwizzleSource::OneFloat:
return GL_ONE;
}
- UNREACHABLE();
+ UNREACHABLE_MSG("Invalid swizzle source={}", source);
return GL_NONE;
}
-GLenum GetComponent(PixelFormat format, bool is_first) {
- switch (format) {
- case PixelFormat::D24_UNORM_S8_UINT:
- case PixelFormat::D32_FLOAT_S8_UINT:
- return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
- case PixelFormat::S8_UINT_D24_UNORM:
- return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+GLenum AttachmentType(PixelFormat format) {
+ switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
+ case SurfaceType::Depth:
+ return GL_DEPTH_ATTACHMENT;
+ case SurfaceType::DepthStencil:
+ return GL_DEPTH_STENCIL_ATTACHMENT;
default:
- UNREACHABLE();
- return GL_DEPTH_COMPONENT;
+ UNIMPLEMENTED_MSG("Unimplemented type={}", type);
+ return GL_NONE;
}
}
-void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
- if (params.IsBuffer()) {
- return;
+[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) {
+ if (!device.HasASTC() && IsPixelFormatASTC(format)) {
+ return true;
}
- glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1));
- if (params.num_levels == 1) {
- glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
+ switch (format) {
+ case PixelFormat::BC4_UNORM:
+ case PixelFormat::BC5_UNORM:
+ return type == ImageType::e3D;
+ default:
+ break;
}
+ return false;
}
-OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format,
- OGLBuffer& texture_buffer) {
- OGLTexture texture;
- texture.Create(target);
+[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::G:
+ return SwizzleSource::R;
+ default:
+ return value;
+ }
+}
- switch (params.target) {
- case SurfaceTarget::Texture1D:
- glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width);
- break;
- case SurfaceTarget::TextureBuffer:
- texture_buffer.Create();
- glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
- nullptr, GL_DYNAMIC_STORAGE_BIT);
- glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
+void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
+ switch (format) {
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ case PixelFormat::S8_UINT_D24_UNORM:
+ UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
+ glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+ TextureMode(format, swizzle[0] == SwizzleSource::R));
+ std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
- case SurfaceTarget::Texture2D:
- case SurfaceTarget::TextureCubemap:
- glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
- params.height);
+ default:
break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width,
- params.height, params.depth);
+ }
+ std::array<GLint, 4> gl_swizzle;
+ std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle);
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+}
+
+[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
+ const VideoCommon::ImageInfo& info) {
+ // Disable accelerated uploads for now as they don't implement swizzled uploads
+ return false;
+ switch (info.type) {
+ case ImageType::e2D:
+ case ImageType::e3D:
+ case ImageType::Linear:
break;
default:
- UNREACHABLE();
+ return false;
+ }
+ const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+ const auto& format_info = runtime.FormatInfo(info.type, internal_format);
+ if (format_info.is_compressed) {
+ return false;
+ }
+ if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
+ return false;
}
+ if (format_info.compatibility_by_size) {
+ return true;
+ }
+ const GLenum store_format = StoreFormat(BytesPerBlock(info.format));
+ const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class;
+ return format_info.compatibility_class == store_class;
+}
- ApplyTextureDefaults(params, texture.handle);
+[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
+ VideoCommon::SubresourceLayers subresource, GLenum target) {
+ switch (target) {
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(offset.y),
+ .z = static_cast<GLint>(subresource.base_layer),
+ };
+ case GL_TEXTURE_3D:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(offset.y),
+ .z = static_cast<GLint>(offset.z),
+ };
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
+ return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0};
+ }
+}
- return texture;
+[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent,
+ VideoCommon::SubresourceLayers dst_subresource,
+ GLenum target) {
+ switch (target) {
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(extent.height),
+ .depth = static_cast<GLsizei>(dst_subresource.num_layers),
+ };
+ case GL_TEXTURE_3D:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(extent.height),
+ .depth = static_cast<GLsizei>(extent.depth),
+ };
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
+ return CopyRegion{.width = 0, .height = 0, .depth = 0};
+ }
}
-constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
- SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
+ if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
+ const GLuint texture = image_view->DefaultHandle();
+ glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ return;
+ }
+ const GLuint texture = image_view->Handle(ImageViewType::e3D);
+ if (image_view->range.extent.layers > 1) {
+ // TODO: OpenGL doesn't support rendering to a fixed number of slices
+ glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ } else {
+ const u32 slice = image_view->range.base.layer;
+ glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice);
+ }
}
} // Anonymous namespace
-CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_,
- bool is_astc_supported_)
- : SurfaceBase<View>{gpu_addr_, params_, is_astc_supported_} {
- if (is_converted) {
- internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
- format = GL_RGBA;
- type = GL_UNSIGNED_BYTE;
- } else {
- const auto& tuple{GetFormatTuple(params.pixel_format)};
- internal_format = tuple.internal_format;
- format = tuple.format;
- type = tuple.type;
- is_compressed = params.IsCompressed();
- }
- target = GetTextureTarget(params.target);
- texture = CreateTexture(params, target, internal_format, texture_buffer);
- DecorateSurfaceName();
+ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
+ : span(map, size), sync{sync_}, handle{handle_} {}
- u32 num_layers = 1;
- if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
- num_layers = params.depth;
+ImageBufferMap::~ImageBufferMap() {
+ if (sync) {
+ sync->Create();
}
-
- main_view =
- CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
}
-CachedSurface::~CachedSurface() = default;
+TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
+ StateTracker& state_tracker_)
+ : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
+ static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
+ for (size_t i = 0; i < TARGETS.size(); ++i) {
+ const GLenum target = TARGETS[i];
+ for (const FormatTuple& tuple : FORMAT_TABLE) {
+ const GLenum format = tuple.internal_format;
+ GLint compat_class;
+ GLint compat_type;
+ GLint is_compressed;
+ glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class);
+ glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1,
+ &compat_type);
+ glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed);
+ const FormatProperties properties{
+ .compatibility_class = static_cast<GLenum>(compat_class),
+ .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE,
+ .is_compressed = is_compressed == GL_TRUE,
+ };
+ format_properties[i].emplace(format, properties);
+ }
+ }
+ null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
+ null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
+ null_image_3d.Create(GL_TEXTURE_3D);
+ null_image_rect.Create(GL_TEXTURE_RECTANGLE);
+ glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
+ glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
+ glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
+ glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
+
+ std::array<GLuint, 4> new_handles;
+ glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
+ null_image_view_1d.handle = new_handles[0];
+ null_image_view_2d.handle = new_handles[1];
+ null_image_view_2d_array.handle = new_handles[2];
+ null_image_view_cube.handle = new_handles[3];
+ glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1,
+ 0, 1);
+ glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0,
+ 1, 0, 1);
+ glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY,
+ null_image_cube_array.handle, GL_R8, 0, 1, 0, 1);
+ glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
+ GL_R8, 0, 1, 0, 6);
+ const std::array texture_handles{
+ null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
+ null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
+ null_image_view_2d_array.handle, null_image_view_cube.handle,
+ };
+ for (const GLuint handle : texture_handles) {
+ static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
+ }
+ const auto set_view = [this](ImageViewType type, GLuint handle) {
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name = fmt::format("NullImage {}", type);
+ glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ }
+ null_image_views[static_cast<size_t>(type)] = handle;
+ };
+ set_view(ImageViewType::e1D, null_image_view_1d.handle);
+ set_view(ImageViewType::e2D, null_image_view_2d.handle);
+ set_view(ImageViewType::Cube, null_image_view_cube.handle);
+ set_view(ImageViewType::e3D, null_image_3d.handle);
+ set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
+ set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
+ set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
+ set_view(ImageViewType::Rect, null_image_rect.handle);
+}
-void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Download);
+TextureCacheRuntime::~TextureCacheRuntime() = default;
- if (params.IsBuffer()) {
- glGetNamedBufferSubData(texture_buffer.handle, 0,
- static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
- staging_buffer.data());
- return;
- }
+void TextureCacheRuntime::Finish() {
+ glFinish();
+}
- SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
+ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
+ return upload_buffers.RequestMap(size, true);
+}
- for (u32 level = 0; level < params.emulated_levels; ++level) {
- glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
- glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
- const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
+ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
+ return download_buffers.RequestMap(size, false);
+}
- u8* const mip_data = staging_buffer.data() + mip_offset;
- const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
- if (is_compressed) {
- glGetCompressedTextureImage(texture.handle, level, size, mip_data);
- } else {
- glGetTextureImage(texture.handle, level, format, type, size, mip_data);
- }
+void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
+ std::span<const ImageCopy> copies) {
+ const GLuint dst_name = dst_image.Handle();
+ const GLuint src_name = src_image.Handle();
+ const GLenum dst_target = ImageTarget(dst_image.info);
+ const GLenum src_target = ImageTarget(src_image.info);
+ for (const ImageCopy& copy : copies) {
+ const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target);
+ const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target);
+ const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target);
+ glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y,
+ src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x,
+ dst_origin.y, dst_origin.z, region.width, region.height, region.depth);
}
}
-void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Upload);
- SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
- for (u32 level = 0; level < params.emulated_levels; ++level) {
- UploadTextureMipmap(level, staging_buffer);
+bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
+ if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
+ return false;
}
+ return true;
}
-void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
- glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
-
- const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
- const u8* buffer{staging_buffer.data() + mip_offset};
- if (is_compressed) {
- const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
- switch (params.target) {
- case SurfaceTarget::Texture2D:
- glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- internal_format, image_size, buffer);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- static_cast<GLsizei>(params.GetMipDepth(level)),
- internal_format, image_size, buffer);
- break;
- case SurfaceTarget::TextureCubemap: {
- const std::size_t host_layer_size{params.GetHostLayerSize(level)};
- for (std::size_t face = 0; face < params.depth; ++face) {
- glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)), 1,
- internal_format,
- static_cast<GLsizei>(host_layer_size), buffer);
- buffer += host_layer_size;
- }
- break;
- }
- default:
- UNREACHABLE();
- }
+void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
+ std::span<const ImageCopy> copies) {
+ if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
+ ASSERT(src.info.type == ImageType::e3D);
+ util_shaders.CopyBC4(dst, src, copies);
} else {
- switch (params.target) {
- case SurfaceTarget::Texture1D:
- glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
- buffer);
- break;
- case SurfaceTarget::TextureBuffer:
- ASSERT(level == 0);
- glNamedBufferSubData(texture_buffer.handle, 0,
- params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
- break;
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2D:
- glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
- params.GetMipHeight(level), format, type, buffer);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureSubImage3D(
- texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
- break;
- case SurfaceTarget::TextureCubemap:
- for (std::size_t face = 0; face < params.depth; ++face) {
- glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
- params.GetMipWidth(level), params.GetMipHeight(level), 1,
- format, type, buffer);
- buffer += params.GetHostLayerSize(level);
- }
- break;
- default:
- UNREACHABLE();
- }
+ UNREACHABLE();
}
}
-void CachedSurface::DecorateSurfaceName() {
- LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
-}
+void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation) {
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
- LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
+ ASSERT(dst->BufferBits() == src->BufferBits());
+
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisablei(GL_SCISSOR_TEST, 0);
+
+ const GLbitfield buffer_bits = dst->BufferBits();
+ const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
+ const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
+ glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y,
+ src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y,
+ dst_region[1].x, dst_region[1].y, buffer_bits,
+ is_linear ? GL_LINEAR : GL_NEAREST);
}
-View CachedSurface::CreateView(const ViewParams& view_key) {
- return CreateViewInner(view_key, false);
+void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+ size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ switch (image.info.type) {
+ case ImageType::e2D:
+ return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
+ case ImageType::e3D:
+ return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
+ case ImageType::Linear:
+ return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
+ default:
+ UNREACHABLE();
+ break;
+ }
}
-View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) {
- auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy);
- views[view_key] = view;
- if (!is_proxy)
- view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
- return view;
+void TextureCacheRuntime::InsertUploadMemoryBarrier() {
+ glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
-CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_,
- bool is_proxy_)
- : ViewBase{params_}, surface{surface_}, format{surface_.internal_format},
- target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} {
- if (!is_proxy_) {
- main_view = CreateTextureView();
+FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const {
+ switch (type) {
+ case ImageType::e1D:
+ return format_properties[0].at(internal_format);
+ case ImageType::e2D:
+ case ImageType::Linear:
+ return format_properties[1].at(internal_format);
+ case ImageType::e3D:
+ return format_properties[2].at(internal_format);
+ default:
+ UNREACHABLE();
+ return FormatProperties{};
}
}
-CachedSurfaceView::~CachedSurfaceView() = default;
+TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
+ : storage_flags{storage_flags_}, map_flags{map_flags_} {}
-void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
- ASSERT(params.num_levels == 1);
+TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
- if (params.target == SurfaceTarget::Texture3D) {
- if (params.num_layers > 1) {
- ASSERT(params.base_layer == 0);
- glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
- } else {
- glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
- params.base_level, params.base_layer);
- }
- return;
+ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
+ bool insert_fence) {
+ const size_t index = RequestBuffer(requested_size);
+ OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
+ return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
+}
+
+size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
+ if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
+ return *index;
}
- if (params.num_layers > 1) {
- UNIMPLEMENTED_IF(params.base_layer != 0);
- glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
- return;
+ OGLBuffer& buffer = buffers.emplace_back();
+ buffer.Create();
+ glNamedBufferStorage(buffer.handle, requested_size, nullptr,
+ storage_flags | GL_MAP_PERSISTENT_BIT);
+ maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
+ map_flags | GL_MAP_PERSISTENT_BIT)));
+
+ syncs.emplace_back();
+ sizes.push_back(requested_size);
+
+ ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
+ maps.size() == sizes.size());
+
+ return buffers.size() - 1;
+}
+
+std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
+ size_t smallest_buffer = std::numeric_limits<size_t>::max();
+ std::optional<size_t> found;
+ const size_t num_buffers = sizes.size();
+ for (size_t index = 0; index < num_buffers; ++index) {
+ const size_t buffer_size = sizes[index];
+ if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
+ continue;
+ }
+ if (syncs[index].handle != 0) {
+ GLint status;
+ glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
+ if (status != GL_SIGNALED) {
+ continue;
+ }
+ syncs[index].Release();
+ }
+ smallest_buffer = buffer_size;
+ found = index;
}
+ return found;
+}
- const GLenum view_target = surface.GetTarget();
- const GLuint texture = surface.GetTexture();
- switch (surface.GetSurfaceParams().target) {
- case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
+Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
+ VAddr cpu_addr_)
+ : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
+ if (CanBeAccelerated(runtime, info)) {
+ flags |= ImageFlagBits::AcceleratedUpload;
+ }
+ if (IsConverted(runtime.device, info.format, info.type)) {
+ flags |= ImageFlagBits::Converted;
+ gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+ gl_store_format = GL_RGBA8;
+ gl_format = GL_RGBA;
+ gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+ } else {
+ const auto& tuple = GetFormatTuple(info.format);
+ gl_internal_format = tuple.internal_format;
+ gl_store_format = tuple.store_format;
+ gl_format = tuple.format;
+ gl_type = tuple.type;
+ }
+ const GLenum target = ImageTarget(info);
+ const GLsizei width = info.size.width;
+ const GLsizei height = info.size.height;
+ const GLsizei depth = info.size.depth;
+ const int max_host_mip_levels = std::bit_width(info.size.width);
+ const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
+ const GLsizei num_layers = info.resources.layers;
+ const GLsizei num_samples = info.num_samples;
+
+ GLuint handle = 0;
+ if (target != GL_TEXTURE_BUFFER) {
+ texture.Create(target);
+ handle = texture.handle;
+ }
+ switch (target) {
+ case GL_TEXTURE_1D_ARRAY:
+ glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers);
break;
- case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
+ case GL_TEXTURE_2D_ARRAY:
+ glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers);
break;
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubemap:
- case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
- params.base_layer);
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
+ // TODO: Where should 'fixedsamplelocations' come from?
+ const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
+ glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x,
+ height >> samples_y, num_layers, GL_FALSE);
+ break;
+ }
+ case GL_TEXTURE_RECTANGLE:
+ glTextureStorage2D(handle, num_levels, gl_store_format, width, height);
+ break;
+ case GL_TEXTURE_3D:
+ glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth);
+ break;
+ case GL_TEXTURE_BUFFER:
+ buffer.Create();
+ glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
break;
default:
- UNIMPLEMENTED();
+ UNREACHABLE_MSG("Invalid target=0x{:x}", target);
+ break;
+ }
+ if (runtime.device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(*this);
+ glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
+ static_cast<GLsizei>(name.size()), name.data());
}
}
-GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
- if (GetSurfaceParams().IsBuffer()) {
- return GetTexture();
- }
- const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (current_swizzle == new_swizzle) {
- return current_view;
- }
- current_swizzle = new_swizzle;
+void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
+ glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
- const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
- OGLTextureView& view = entry->second;
- if (!is_cache_miss) {
- current_view = view.handle;
- return view.handle;
- }
- view = CreateTextureView();
- current_view = view.handle;
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
- std::array swizzle{x_source, y_source, z_source, w_source};
+ u32 current_row_length = std::numeric_limits<u32>::max();
+ u32 current_image_height = std::numeric_limits<u32>::max();
- switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) {
- case PixelFormat::D24_UNORM_S8_UINT:
- case PixelFormat::D32_FLOAT_S8_UINT:
- case PixelFormat::S8_UINT_D24_UNORM:
- UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
- glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
- GetComponent(pixel_format, x_source == SwizzleSource::R));
-
- // Make sure we sample the first component
- std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
- return value == SwizzleSource::G ? SwizzleSource::R : value;
- });
- [[fallthrough]];
- default: {
- const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
- GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
- glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
- break;
- }
+ for (const VideoCommon::BufferImageCopy& copy : copies) {
+ if (current_row_length != copy.buffer_row_length) {
+ current_row_length = copy.buffer_row_length;
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
+ }
+ if (current_image_height != copy.buffer_image_height) {
+ current_image_height = copy.buffer_image_height;
+ glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
+ }
+ CopyBufferToImage(copy, buffer_offset);
}
- return view.handle;
}
-OGLTextureView CachedSurfaceView::CreateTextureView() const {
- OGLTextureView texture_view;
- texture_view.Create();
-
- if (target == GL_TEXTURE_3D) {
- glTextureView(texture_view.handle, target, surface.texture.handle, format,
- params.base_level, params.num_levels, 0, 1);
- } else {
- glTextureView(texture_view.handle, target, surface.texture.handle, format,
- params.base_level, params.num_levels, params.base_layer, params.num_layers);
+void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ for (const VideoCommon::BufferCopy& copy : copies) {
+ glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
+ copy.dst_offset, copy.size);
}
- ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
-
- return texture_view;
}
-TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- StateTracker& state_tracker_)
- : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()},
- state_tracker{state_tracker_} {
- src_framebuffer.Create();
- dst_framebuffer.Create();
-}
+void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
-TextureCacheOpenGL::~TextureCacheOpenGL() = default;
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
-Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
- return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
-}
+ u32 current_row_length = std::numeric_limits<u32>::max();
+ u32 current_image_height = std::numeric_limits<u32>::max();
-void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) {
- const auto& src_params = src_surface->GetSurfaceParams();
- const auto& dst_params = dst_surface->GetSurfaceParams();
- if (src_params.type != dst_params.type) {
- // A fallback is needed
- return;
+ for (const VideoCommon::BufferImageCopy& copy : copies) {
+ if (current_row_length != copy.buffer_row_length) {
+ current_row_length = copy.buffer_row_length;
+ glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
+ }
+ if (current_image_height != copy.buffer_image_height) {
+ current_image_height = copy.buffer_image_height;
+ glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
+ }
+ CopyImageToBuffer(copy, buffer_offset);
}
- const auto src_handle = src_surface->GetTexture();
- const auto src_target = src_surface->GetTarget();
- const auto dst_handle = dst_surface->GetTexture();
- const auto dst_target = dst_surface->GetTarget();
- glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
- copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
- copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
- copy_params.dest_z, copy_params.width, copy_params.height,
- copy_params.depth);
}
-void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) {
- const auto& src_params{src_view->GetSurfaceParams()};
- const auto& dst_params{dst_view->GetSurfaceParams()};
- UNIMPLEMENTED_IF(src_params.depth != 1);
- UNIMPLEMENTED_IF(dst_params.depth != 1);
-
- state_tracker.NotifyScissor0();
- state_tracker.NotifyFramebuffer();
- state_tracker.NotifyRasterizeEnable();
- state_tracker.NotifyFramebufferSRGB();
+void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
+ // Compressed formats don't have a pixel format or type
+ const bool is_compressed = gl_format == GL_NONE;
+ const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset);
- if (dst_params.srgb_conversion) {
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
+ switch (info.type) {
+ case ImageType::e1D:
+ if (is_compressed) {
+ glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_subresource.base_layer,
+ copy.image_extent.width,
+ copy.image_subresource.num_layers, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_subresource.base_layer,
+ copy.image_extent.width, copy.image_subresource.num_layers,
+ gl_format, gl_type, offset);
+ }
+ break;
+ case ImageType::e2D:
+ case ImageType::Linear:
+ if (is_compressed) {
+ glCompressedTextureSubImage3D(
+ texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
+ copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width,
+ copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_offset.y,
+ copy.image_subresource.base_layer, copy.image_extent.width,
+ copy.image_extent.height, copy.image_subresource.num_layers,
+ gl_format, gl_type, offset);
+ }
+ break;
+ case ImageType::e3D:
+ if (is_compressed) {
+ glCompressedTextureSubImage3D(
+ texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
+ copy.image_offset.y, copy.image_offset.z, copy.image_extent.width,
+ copy.image_extent.height, copy.image_extent.depth, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_offset.y, copy.image_offset.z,
+ copy.image_extent.width, copy.image_extent.height,
+ copy.image_extent.depth, gl_format, gl_type, offset);
+ }
+ break;
+ default:
+ UNREACHABLE();
}
- glDisable(GL_RASTERIZER_DISCARD);
- glDisablei(GL_SCISSOR_TEST, 0);
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
-
- GLenum buffers = 0;
- if (src_params.type == SurfaceType::ColorTexture) {
- src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
-
- dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
-
- buffers = GL_COLOR_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+}
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
+ const GLint x_offset = copy.image_offset.x;
+ const GLsizei width = copy.image_extent.width;
- buffers = GL_DEPTH_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::DepthStencil) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
+ const GLint level = copy.image_subresource.base_level;
+ const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size);
+ void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ GLint y_offset = 0;
+ GLint z_offset = 0;
+ GLsizei height = 1;
+ GLsizei depth = 1;
- buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ switch (info.type) {
+ case ImageType::e1D:
+ y_offset = copy.image_subresource.base_layer;
+ height = copy.image_subresource.num_layers;
+ break;
+ case ImageType::e2D:
+ case ImageType::Linear:
+ y_offset = copy.image_offset.y;
+ z_offset = copy.image_subresource.base_layer;
+ height = copy.image_extent.height;
+ depth = copy.image_subresource.num_layers;
+ break;
+ case ImageType::e3D:
+ y_offset = copy.image_offset.y;
+ z_offset = copy.image_offset.z;
+ height = copy.image_extent.height;
+ depth = copy.image_extent.depth;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ // Compressed formats don't have a pixel format or type
+ const bool is_compressed = gl_format == GL_NONE;
+ if (is_compressed) {
+ glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width,
+ height, depth, buffer_size, offset);
+ } else {
+ glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height,
+ depth, gl_format, gl_type, buffer_size, offset);
}
-
- const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
- const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
- const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
-
- glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
- static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
- static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
- static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
- buffers,
- is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
}
-void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
- const auto& src_params = src_surface->GetSurfaceParams();
- const auto& dst_params = dst_surface->GetSurfaceParams();
- UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
+ ImageId image_id_, Image& image)
+ : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
+ const Device& device = runtime.device;
+ if (True(image.flags & ImageFlagBits::Converted)) {
+ internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+ } else {
+ internal_format = GetFormatTuple(format).internal_format;
+ }
+ VideoCommon::SubresourceRange flatten_range = info.range;
+ std::array<GLuint, 2> handles;
+ stored_views.reserve(2);
- const auto source_format = GetFormatTuple(src_params.pixel_format);
- const auto dest_format = GetFormatTuple(dst_params.pixel_format);
+ switch (info.type) {
+ case ImageViewType::e1DArray:
+ flatten_range.extent.layers = 1;
+ [[fallthrough]];
+ case ImageViewType::e1D:
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::e2DArray:
+ flatten_range.extent.layers = 1;
+ [[fallthrough]];
+ case ImageViewType::e2D:
+ if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
+ // 2D and 2D array views on a 3D textures are used exclusively for render targets
+ ASSERT(info.range.extent.levels == 1);
+ const VideoCommon::SubresourceRange slice_range{
+ .base = {.level = info.range.base.level, .layer = 0},
+ .extent = {.levels = 1, .layers = 1},
+ };
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
+ break;
+ }
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::e3D:
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+ break;
+ case ImageViewType::CubeArray:
+ flatten_range.extent.layers = 6;
+ [[fallthrough]];
+ case ImageViewType::Cube:
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::Rect:
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+ break;
+ case ImageViewType::Buffer:
+ glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
+ SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+ break;
+ }
+ default_handle = Handle(info.type);
+}
- const std::size_t source_size = src_surface->GetHostSizeInBytes();
- const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
+ : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
- const std::size_t buffer_size = std::max(source_size, dest_size);
+void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
+ GLuint handle, const VideoCommon::ImageViewInfo& info,
+ VideoCommon::SubresourceRange view_range) {
+ if (info.type == ImageViewType::Buffer) {
+ // TODO: Take offset from buffer cache
+ glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
+ image.guest_size_bytes);
+ } else {
+ const GLuint parent = image.texture.handle;
+ const GLenum target = ImageTarget(view_type, image.info.num_samples);
+ glTextureView(handle, target, parent, internal_format, view_range.base.level,
+ view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+ if (!info.IsRenderTarget()) {
+ ApplySwizzle(handle, format, info.Swizzle());
+ }
+ }
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(*this, view_type);
+ glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ }
+ stored_views.emplace_back().handle = handle;
+ views[static_cast<size_t>(view_type)] = handle;
+}
- GLuint copy_pbo_handle = FetchPBO(buffer_size);
+Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
+ const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE;
+ const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func);
+ const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None);
+ const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter);
+ const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter);
+ const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE;
+
+ UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
+ UNIMPLEMENTED_IF(config.float_coord_normalization != 0);
+
+ sampler.Create();
+ const GLuint handle = sampler.handle;
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
+ glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
+ glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
+ glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
+ glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
+ glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
+
+ if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
+ } else {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
+ }
+ if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
+ glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
+ } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
+ }
+ if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
+ } else if (seamless == GL_FALSE) {
+ // We default to false because it's more common
+ LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ }
+}
- glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
+Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
+ // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
+ // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
+ // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
+ // mismatching size, this is why core framebuffers are preferred.
+ GLuint handle;
+ glGenFramebuffers(1, &handle);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
+
+ GLsizei num_buffers = 0;
+ std::array<GLenum, NUM_RT> gl_draw_buffers;
+ gl_draw_buffers.fill(GL_NONE);
+
+ for (size_t index = 0; index < color_buffers.size(); ++index) {
+ const ImageView* const image_view = color_buffers[index];
+ if (!image_view) {
+ continue;
+ }
+ buffer_bits |= GL_COLOR_BUFFER_BIT;
+ gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index];
+ num_buffers = static_cast<GLsizei>(index + 1);
- if (src_surface->IsCompressed()) {
- glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
- nullptr);
- } else {
- glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
- static_cast<GLsizei>(source_size), nullptr);
+ const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index);
+ AttachTexture(handle, attachment, image_view);
}
- glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
+ if (const ImageView* const image_view = depth_buffer; image_view) {
+ if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
+ buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ } else {
+ buffer_bits |= GL_DEPTH_BUFFER_BIT;
+ }
+ const GLenum attachment = AttachmentType(image_view->format);
+ AttachTexture(handle, attachment, image_view);
+ }
- const GLsizei width = static_cast<GLsizei>(dst_params.width);
- const GLsizei height = static_cast<GLsizei>(dst_params.height);
- const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
- if (dst_surface->IsCompressed()) {
- LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
- UNREACHABLE();
+ if (num_buffers > 1) {
+ glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data());
+ } else if (num_buffers > 0) {
+ glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]);
} else {
- switch (dst_params.target) {
- case SurfaceTarget::Texture1D:
- glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
- dest_format.type, nullptr);
- break;
- case SurfaceTarget::Texture2D:
- glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
- dest_format.format, dest_format.type, nullptr);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
- dest_format.format, dest_format.type, nullptr);
- break;
- case SurfaceTarget::TextureCubemap:
- glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
- dest_format.format, dest_format.type, nullptr);
- break;
- default:
- LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target);
- UNREACHABLE();
- }
+ glNamedFramebufferDrawBuffer(handle, GL_NONE);
}
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- glTextureBarrier();
-}
+ glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width);
+ glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height);
+ // TODO
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...);
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...);
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...);
-GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
- ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; });
- const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
- OGLBuffer& cp = copy_pbo_cache[l2];
- if (cp.handle == 0) {
- const std::size_t ceil_size = 1ULL << l2;
- cp.Create();
- cp.MakeStreamCopy(ceil_size);
+ if (runtime.device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(key);
+ glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
}
- return cp.handle;
+ framebuffer.handle = handle;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 72b284fab..04193e31e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -4,157 +4,247 @@
#pragma once
-#include <array>
-#include <functional>
#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
+#include <span>
#include <glad/glad.h>
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
-using VideoCommon::SurfaceParams;
-using VideoCommon::ViewParams;
-
-class CachedSurfaceView;
-class CachedSurface;
-class TextureCacheOpenGL;
+class Device;
+class ProgramManager;
class StateTracker;
-using Surface = std::shared_ptr<CachedSurface>;
-using View = std::shared_ptr<CachedSurfaceView>;
-using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
+class Framebuffer;
+class Image;
+class ImageView;
+class Sampler;
-class CachedSurface final : public VideoCommon::SurfaceBase<View> {
- friend CachedSurfaceView;
+using VideoCommon::ImageId;
+using VideoCommon::ImageViewId;
+using VideoCommon::ImageViewType;
+using VideoCommon::NUM_RT;
+using VideoCommon::Offset2D;
+using VideoCommon::RenderTargets;
+class ImageBufferMap {
public:
- explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_,
- bool is_astc_supported_);
- ~CachedSurface();
-
- void UploadTexture(const std::vector<u8>& staging_buffer) override;
- void DownloadTexture(std::vector<u8>& staging_buffer) override;
+ explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
+ ~ImageBufferMap();
- GLenum GetTarget() const {
- return target;
+ GLuint Handle() const noexcept {
+ return handle;
}
- GLuint GetTexture() const {
- return texture.handle;
+ std::span<u8> Span() const noexcept {
+ return span;
}
- bool IsCompressed() const {
- return is_compressed;
+private:
+ std::span<u8> span;
+ OGLSync* sync;
+ GLuint handle;
+};
+
+struct FormatProperties {
+ GLenum compatibility_class;
+ bool compatibility_by_size;
+ bool is_compressed;
+};
+
+class TextureCacheRuntime {
+ friend Framebuffer;
+ friend Image;
+ friend ImageView;
+ friend Sampler;
+
+public:
+ explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
+ StateTracker& state_tracker);
+ ~TextureCacheRuntime();
+
+ void Finish();
+
+ ImageBufferMap MapUploadBuffer(size_t size);
+
+ ImageBufferMap MapDownloadBuffer(size_t size);
+
+ void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+ UNIMPLEMENTED();
}
-protected:
- void DecorateSurfaceName() override;
+ bool CanImageBeCopied(const Image& dst, const Image& src);
+
+ void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
+ void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation);
- View CreateView(const ViewParams& view_key) override;
- View CreateViewInner(const ViewParams& view_key, bool is_proxy);
+ void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void InsertUploadMemoryBarrier();
+
+ FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
private:
- void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
+ struct StagingBuffers {
+ explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
+ ~StagingBuffers();
- GLenum internal_format{};
- GLenum format{};
- GLenum type{};
- bool is_compressed{};
- GLenum target{};
- u32 view_count{};
+ ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
- OGLTexture texture;
- OGLBuffer texture_buffer;
+ size_t RequestBuffer(size_t requested_size);
+
+ std::optional<size_t> FindBuffer(size_t requested_size);
+
+ std::vector<OGLSync> syncs;
+ std::vector<OGLBuffer> buffers;
+ std::vector<u8*> maps;
+ std::vector<size_t> sizes;
+ GLenum storage_flags;
+ GLenum map_flags;
+ };
+
+ const Device& device;
+ StateTracker& state_tracker;
+ UtilShaders util_shaders;
+
+ std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
+
+ StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
+
+ OGLTexture null_image_1d_array;
+ OGLTexture null_image_cube_array;
+ OGLTexture null_image_3d;
+ OGLTexture null_image_rect;
+ OGLTextureView null_image_view_1d;
+ OGLTextureView null_image_view_2d;
+ OGLTextureView null_image_view_2d_array;
+ OGLTextureView null_image_view_cube;
+
+ std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
};
-class CachedSurfaceView final : public VideoCommon::ViewBase {
+class Image : public VideoCommon::ImageBase {
+ friend ImageView;
+
public:
- explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_);
- ~CachedSurfaceView();
+ explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
+ VAddr cpu_addr);
- /// @brief Attaches this texture view to the currently bound fb_target framebuffer
- /// @param attachment Attachment to bind textures to
- /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
- void Attach(GLenum attachment, GLenum fb_target) const;
+ void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
- GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferCopy> copies);
- void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
+ void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
- void MarkAsModified(u64 tick) {
- surface.MarkAsModified(true, tick);
+ GLuint Handle() const noexcept {
+ return texture.handle;
}
- GLuint GetTexture() const {
- if (is_proxy) {
- return surface.GetTexture();
- }
- return main_view.handle;
+private:
+ void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
+
+ void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
+
+ OGLTexture texture;
+ OGLTextureView store_view;
+ OGLBuffer buffer;
+ GLenum gl_internal_format = GL_NONE;
+ GLenum gl_store_format = GL_NONE;
+ GLenum gl_format = GL_NONE;
+ GLenum gl_type = GL_NONE;
+};
+
+class ImageView : public VideoCommon::ImageViewBase {
+ friend Image;
+
+public:
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
+
+ [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
+ return views[static_cast<size_t>(query_type)];
}
- GLenum GetFormat() const {
- return format;
+ [[nodiscard]] GLuint DefaultHandle() const noexcept {
+ return default_handle;
}
- const SurfaceParams& GetSurfaceParams() const {
- return surface.GetSurfaceParams();
+ [[nodiscard]] GLenum Format() const noexcept {
+ return internal_format;
}
private:
- OGLTextureView CreateTextureView() const;
+ void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
+ const VideoCommon::ImageViewInfo& info,
+ VideoCommon::SubresourceRange view_range);
+
+ std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+ std::vector<OGLTextureView> stored_views;
+ GLuint default_handle = 0;
+ GLenum internal_format = GL_NONE;
+};
+
+class ImageAlloc : public VideoCommon::ImageAllocBase {};
- CachedSurface& surface;
- const GLenum format;
- const GLenum target;
- const bool is_proxy;
+class Sampler {
+public:
+ explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
- std::unordered_map<u32, OGLTextureView> view_cache;
- OGLTextureView main_view;
+ GLuint Handle() const noexcept {
+ return sampler.handle;
+ }
- // Use an invalid default so it always fails the comparison test
- u32 current_swizzle = 0xffffffff;
- GLuint current_view = 0;
+private:
+ OGLSampler sampler;
};
-class TextureCacheOpenGL final : public TextureCacheBase {
+class Framebuffer {
public:
- explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- StateTracker& state_tracker);
- ~TextureCacheOpenGL();
-
-protected:
- Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
-
- void ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) override;
+ explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
- void ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return framebuffer.handle;
+ }
- void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
+ [[nodiscard]] GLbitfield BufferBits() const noexcept {
+ return buffer_bits;
+ }
private:
- GLuint FetchPBO(std::size_t buffer_size);
-
- StateTracker& state_tracker;
+ OGLFramebuffer framebuffer;
+ GLbitfield buffer_bits = GL_NONE;
+};
- OGLFramebuffer src_framebuffer;
- OGLFramebuffer dst_framebuffer;
- std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
+struct TextureCacheParams {
+ static constexpr bool ENABLE_VALIDATION = true;
+ static constexpr bool FRAMEBUFFER_BLITS = true;
+ static constexpr bool HAS_EMULATED_COPIES = true;
+
+ using Runtime = OpenGL::TextureCacheRuntime;
+ using Image = OpenGL::Image;
+ using ImageAlloc = OpenGL::ImageAlloc;
+ using ImageView = OpenGL::ImageView;
+ using Sampler = OpenGL::Sampler;
+ using Framebuffer = OpenGL::Framebuffer;
};
+using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index dd4ee3361..cbccfdeb4 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
return GL_FILL;
}
+inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
+ switch (filter) {
+ case Tegra::Texture::SamplerReduction::WeightedAverage:
+ return GL_WEIGHTED_AVERAGE_ARB;
+ case Tegra::Texture::SamplerReduction::Min:
+ return GL_MIN;
+ case Tegra::Texture::SamplerReduction::Max:
+ return GL_MAX;
+ }
+ UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
+ return GL_WEIGHTED_AVERAGE_ARB;
+}
+
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
// Enumeration order matches register order. We can convert it arithmetically.
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index cbfaaa99c..dd77a543c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -23,10 +23,10 @@
#include "core/telemetry_session.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
-#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/textures/decoders.h"
namespace OpenGL {
@@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
-
PrepareRendertarget(framebuffer);
RenderScreenshot();
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+ state_tracker.BindFramebuffer(0);
DrawScreen(emu_window.GetFramebufferLayout());
++m_current_frame;
@@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
- const auto pixel_format{
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
- const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
- const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
- rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
-
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
- VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
- framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
- gl_framebuffer_data.data(), host_ptr);
-
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
+ const u64 size_in_bytes{Tegra::Texture::CalculateSize(
+ true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
+ const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
+ const std::span<const u8> input_data(host_ptr, size_in_bytes);
+ Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
+ framebuffer.width, framebuffer.height, 1, block_height_log2,
+ 0);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
@@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+ // Generate presentation sampler
+ present_sampler.Create();
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
+ // Enable seamless cubemaps when per texture parameters are not available
+ if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+ }
+
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
@@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
- const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
GLint internal_format;
@@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
- static_cast<u32>(framebuffer.pixel_format));
+ // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+ // static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
@@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
- state_tracker.NotifyColorMask0();
+ state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
@@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
}
glBindTextureUnit(0, screen_info.display_texture);
- glBindSampler(0, 0);
+ glBindSampler(0, present_sampler.handle);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
DrawScreen(layout);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
renderer_settings.screenshot_bits);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 376f88766..44e109794 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -102,6 +102,7 @@ private:
StateTracker state_tracker{gpu};
// OpenGL object IDs
+ OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
new file mode 100644
index 000000000..eb849cbf2
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -0,0 +1,224 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <bit>
+#include <span>
+#include <string_view>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
+#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
+#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
+#include "video_core/host_shaders/pitch_unswizzle_comp.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/renderer_opengl/util_shaders.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/accelerated_swizzle.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/decoders.h"
+
+namespace OpenGL {
+
+using namespace HostShaders;
+
+using VideoCommon::Extent3D;
+using VideoCommon::ImageCopy;
+using VideoCommon::ImageType;
+using VideoCommon::SwizzleParameters;
+using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
+using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
+using VideoCore::Surface::BytesPerBlock;
+
+namespace {
+
+OGLProgram MakeProgram(std::string_view source) {
+ OGLShader shader;
+ shader.Create(source, GL_COMPUTE_SHADER);
+
+ OGLProgram program;
+ program.Create(true, false, shader.handle);
+ return program;
+}
+
+} // Anonymous namespace
+
+UtilShaders::UtilShaders(ProgramManager& program_manager_)
+ : program_manager{program_manager_},
+ block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
+ block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
+ pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
+ copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
+ const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
+ swizzle_table_buffer.Create();
+ glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
+}
+
+UtilShaders::~UtilShaders() = default;
+
+void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
+ static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
+ static constexpr GLuint BINDING_INPUT_BUFFER = 1;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+
+ program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+ glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
+
+ const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+
+ const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
+ glUniform3uiv(0, 1, params.origin.data());
+ glUniform3iv(1, 1, params.destination.data());
+ glUniform1ui(2, params.bytes_per_block_log2);
+ glUniform1ui(3, params.layer_stride);
+ glUniform1ui(4, params.block_size);
+ glUniform1ui(5, params.x_shift);
+ glUniform1ui(6, params.block_height);
+ glUniform1ui(7, params.block_height_mask);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
+ input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ GL_WRITE_ONLY, store_format);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
+
+ static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
+ static constexpr GLuint BINDING_INPUT_BUFFER = 1;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+
+ glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
+
+ const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+ const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
+
+ const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
+ glUniform3uiv(0, 1, params.origin.data());
+ glUniform3iv(1, 1, params.destination.data());
+ glUniform1ui(2, params.bytes_per_block_log2);
+ glUniform1ui(3, params.slice_size);
+ glUniform1ui(4, params.block_size);
+ glUniform1ui(5, params.x_shift);
+ glUniform1ui(6, params.block_height);
+ glUniform1ui(7, params.block_height_mask);
+ glUniform1ui(8, params.block_depth);
+ glUniform1ui(9, params.block_depth_mask);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
+ input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ GL_WRITE_ONLY, store_format);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
+ static constexpr GLuint BINDING_INPUT_BUFFER = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+ static constexpr GLuint LOC_ORIGIN = 0;
+ static constexpr GLuint LOC_DESTINATION = 1;
+ static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
+ static constexpr GLuint LOC_PITCH = 3;
+
+ const u32 bytes_per_block = BytesPerBlock(image.info.format);
+ const GLenum format = StoreFormat(bytes_per_block);
+ const u32 pitch = image.info.pitch;
+
+ UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
+ "Non-power of two images are not implemented");
+
+ program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+ glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glUniform2ui(LOC_ORIGIN, 0, 0);
+ glUniform2i(LOC_DESTINATION, 0, 0);
+ glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
+ glUniform1ui(LOC_PITCH, pitch);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
+ input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
+ static constexpr GLuint BINDING_INPUT_IMAGE = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
+ static constexpr GLuint LOC_SRC_OFFSET = 0;
+ static constexpr GLuint LOC_DST_OFFSET = 1;
+
+ program_manager.BindHostCompute(copy_bc4_program.handle);
+
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_subresource.base_layer == 0);
+ ASSERT(copy.src_subresource.num_layers == 1);
+ ASSERT(copy.dst_subresource.base_layer == 0);
+ ASSERT(copy.dst_subresource.num_layers == 1);
+
+ glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
+ glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
+ GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
+ copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+ glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+GLenum StoreFormat(u32 bytes_per_block) {
+ switch (bytes_per_block) {
+ case 1:
+ return GL_R8UI;
+ case 2:
+ return GL_R16UI;
+ case 4:
+ return GL_R32UI;
+ case 8:
+ return GL_RG32UI;
+ case 16:
+ return GL_RGBA32UI;
+ }
+ UNREACHABLE();
+ return GL_R8UI;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
new file mode 100644
index 000000000..359997255
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -0,0 +1,51 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture_cache/types.h"
+
+namespace OpenGL {
+
+class Image;
+class ImageBufferMap;
+class ProgramManager;
+
+class UtilShaders {
+public:
+ explicit UtilShaders(ProgramManager& program_manager);
+ ~UtilShaders();
+
+ void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void CopyBC4(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies);
+
+private:
+ ProgramManager& program_manager;
+
+ OGLBuffer swizzle_table_buffer;
+
+ OGLProgram block_linear_unswizzle_2d_program;
+ OGLProgram block_linear_unswizzle_3d_program;
+ OGLProgram pitch_unswizzle_program;
+ OGLProgram copy_bc4_program;
+};
+
+GLenum StoreFormat(u32 bytes_per_block);
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
deleted file mode 100644
index 6d7bb16b2..000000000
--- a/src/video_core/renderer_opengl/utils.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-#include <vector>
-
-#include <fmt/format.h>
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/utils.h"
-
-namespace OpenGL {
-
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
- if (!GLAD_GL_KHR_debug) {
- // We don't need to throw an error as this is just for debugging
- return;
- }
-
- std::string object_label;
- if (extra_info.empty()) {
- switch (identifier) {
- case GL_TEXTURE:
- object_label = fmt::format("Texture@0x{:016X}", addr);
- break;
- case GL_PROGRAM:
- object_label = fmt::format("Shader@0x{:016X}", addr);
- break;
- default:
- object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
- break;
- }
- } else {
- object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
- }
- glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
deleted file mode 100644
index 9c09ee12c..000000000
--- a/src/video_core/renderer_opengl/utils.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string_view>
-#include <vector>
-#include <glad/glad.h>
-#include "common/common_types.h"
-
-namespace OpenGL {
-
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
-
-} // namespace OpenGL