summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp132
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp33
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp69
-rw-r--r--src/video_core/renderer_opengl/gl_device.h18
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp85
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h68
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp565
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h80
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h15
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h19
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1330
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h289
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h43
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp63
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h9
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp224
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h51
-rw-r--r--src/video_core/renderer_opengl/utils.cpp42
-rw-r--r--src/video_core/renderer_opengl/utils.h16
36 files changed, 2009 insertions, 1480 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index b7e9ed2e9..3e4d88c30 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -39,8 +39,8 @@ using Operation = const OperationNode&;
constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
char Swizzle(std::size_t component) {
- ASSERT(component < 4);
- return component["xyzw"];
+ static constexpr std::string_view SWIZZLE{"xyzw"};
+ return SWIZZLE.at(component);
}
constexpr bool IsGenericAttribute(Attribute::Index index) {
@@ -71,7 +71,7 @@ std::string_view GetInputFlags(PixelImap attribute) {
case PixelImap::Unused:
break;
}
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
return {};
}
@@ -123,7 +123,7 @@ std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::Primitive
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
return "TRIANGLES_ADJACENCY";
default:
- UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ UNIMPLEMENTED_MSG("topology={}", topology);
return "POINTS";
}
}
@@ -137,7 +137,7 @@ std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
case Tegra::Shader::OutputTopology::TriangleStrip:
return "TRIANGLE_STRIP";
default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+ UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
return "points";
}
}
@@ -187,8 +187,8 @@ std::string TextureType(const MetaTexture& meta) {
class ARBDecompiler final {
public:
- explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier);
+ explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
+ ShaderType stage_, std::string_view identifier);
std::string Code() const {
return shader_source;
@@ -224,7 +224,7 @@ private:
std::string Visit(const Node& node);
- std::pair<std::string, std::size_t> BuildCoords(Operation);
+ std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
std::string BuildAoffi(Operation);
std::string GlobalMemoryPointer(const GmemNode& gmem);
void Exit();
@@ -376,9 +376,11 @@ private:
std::string temporary = AllocTemporary();
std::string address;
std::string_view opname;
+ bool robust = false;
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
address = GlobalMemoryPointer(*gmem);
opname = "ATOM";
+ robust = true;
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
opname = "ATOMS";
@@ -386,7 +388,15 @@ private:
UNREACHABLE();
return "{0, 0, 0, 0}";
}
+ if (robust) {
+ AddLine("IF NE.x;");
+ }
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
+ if (robust) {
+ AddLine("ELSE;");
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("ENDIF;");
+ }
return temporary;
}
@@ -792,9 +802,9 @@ private:
};
};
-ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier)
- : device{device}, ir{ir}, registry{registry}, stage{stage} {
+ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
+ ShaderType stage_, std::string_view identifier)
+ : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
DefineGlobalMemory();
AddLine("TEMP RC;");
@@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
}
void ARBDecompiler::DeclareGlobalMemory() {
- const std::size_t num_entries = ir.GetGlobalMemory().size();
+ const size_t num_entries = ir.GetGlobalMemory().size();
if (num_entries > 0) {
- const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
- AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
+ AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
}
}
@@ -1125,44 +1134,44 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
- } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(ast->condition);
+ } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(if_then->condition);
ResetTemporaries();
AddLine("MOVC.U RC.x, {};", condition);
AddLine("IF NE.x;");
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
AddLine("ENDIF;");
- } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
+ } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
AddLine("ELSE;");
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
- } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
- VisitBlock(ast->nodes);
- } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
- AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
+ } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
+ VisitBlock(decoded->nodes);
+ } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
+ AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
ResetTemporaries();
- } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(ast->condition);
+ } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(do_while->condition);
ResetTemporaries();
AddLine("REP;");
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
AddLine("MOVC.U RC.x, {};", condition);
AddLine("BRK (NE.x);");
AddLine("ENDREP;");
- } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
- const bool is_true = ExprIsTrue(ast->condition);
+ } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
+ const bool is_true = ExprIsTrue(ast_return->condition);
if (!is_true) {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
AddLine("IF NE.x;");
ResetTemporaries();
}
- if (ast->kills) {
+ if (ast_return->kills) {
AddLine("KIL TR;");
} else {
Exit();
@@ -1170,11 +1179,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
if (!is_true) {
AddLine("ENDIF;");
}
- } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
- if (ExprIsTrue(ast->condition)) {
+ } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
+ if (ExprIsTrue(ast_break->condition)) {
AddLine("BRK;");
} else {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
AddLine("BRK (NE.x);");
ResetTemporaries();
}
@@ -1342,7 +1351,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
GetGenericAttributeIndex(index), swizzle);
}
}
- UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
+ UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
break;
}
return "{0, 0, 0, 0}.x";
@@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
std::string temporary = AllocTemporary();
- AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
+ AddLine("MOV {}, 0;", temporary);
+ AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
return temporary;
}
@@ -1406,12 +1416,12 @@ std::string ARBDecompiler::Visit(const Node& node) {
return {};
}
-std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
+std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
UNIMPLEMENTED_IF(meta.sampler.is_indexed);
- UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
- meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
+ const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
+ meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
const std::size_t count = operation.GetOperandsCount();
std::string temporary = AllocVectorTemporary();
std::size_t i = 0;
@@ -1419,12 +1429,21 @@ std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operati
AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
}
if (meta.sampler.is_array) {
- AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
+ AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
+ ++i;
}
if (meta.sampler.is_shadow) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
+ std::string compare = Visit(meta.depth_compare);
+ if (is_extended) {
+ ASSERT(i == 4);
+ std::string extra_coord = AllocVectorTemporary();
+ AddLine("MOV.F {}.x, {};", extra_coord, compare);
+ return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
+ }
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
+ ++i;
}
- return {std::move(temporary), i};
+ return {temporary, temporary, i};
}
std::string ARBDecompiler::BuildAoffi(Operation operation) {
@@ -1441,18 +1460,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
}
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
+ // Read a bindless SSBO, return its address and set CC accordingly
+ // address = c[binding].xy
+ // length = c[binding].z
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
- const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
const std::string pointer = AllocLongVectorTemporary();
std::string temporary = AllocTemporary();
- const u32 local_index = binding / 2;
- AddLine("PK64.U {}, c[{}];", pointer, local_index);
+ AddLine("PK64.U {}, c[{}];", pointer, binding);
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
Visit(gmem.GetBaseAddress()));
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
- AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
+ AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
+ // Compare offset to length and set CC
+ AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
return fmt::format("{}.x", pointer);
}
@@ -1463,9 +1485,7 @@ void ARBDecompiler::Exit() {
}
const auto safe_get_register = [this](u32 reg) -> std::string {
- // TODO(Rodrigo): Replace with contains once C++20 releases
- const auto& used_registers = ir.GetRegisters();
- if (used_registers.find(reg) != used_registers.end()) {
+ if (ir.GetRegisters().contains(reg)) {
return fmt::format("R{}.x", reg);
}
return "{0, 0, 0, 0}.x";
@@ -1552,7 +1572,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
ResetTemporaries();
return {};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
+ AddLine("IF NE.x;");
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
+ AddLine("ENDIF;");
ResetTemporaries();
return {};
} else {
@@ -1844,7 +1866,7 @@ std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
std::string ARBDecompiler::Texture(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [temporary, swizzle] = BuildCoords(operation);
+ const auto [coords, temporary, swizzle] = BuildCoords(operation);
std::string_view opcode = "TEX";
std::string extra;
@@ -1873,7 +1895,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
}
}
- AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
+ AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
TextureType(meta), BuildAoffi(operation));
AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
return fmt::format("{}.x", temporary);
@@ -1882,7 +1904,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
std::string ARBDecompiler::TextureGather(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [temporary, swizzle] = BuildCoords(operation);
+ const auto [coords, temporary, swizzle] = BuildCoords(operation);
std::string comp;
if (!meta.sampler.is_shadow) {
@@ -1892,7 +1914,7 @@ std::string ARBDecompiler::TextureGather(Operation operation) {
AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
return fmt::format("{}.x", temporary);
}
@@ -1930,13 +1952,13 @@ std::string ARBDecompiler::TextureQueryLod(Operation operation) {
std::string ARBDecompiler::TexelFetch(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [temporary, swizzle] = BuildCoords(operation);
+ const auto [coords, temporary, swizzle] = BuildCoords(operation);
if (!meta.sampler.is_buffer) {
ASSERT(swizzle < 4);
AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
}
- AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
+ AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
BuildAoffi(operation));
AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
return fmt::format("{}.x", temporary);
@@ -1947,7 +1969,7 @@ std::string ARBDecompiler::TextureGradient(Operation operation) {
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
const std::string ddx = AllocVectorTemporary();
const std::string ddy = AllocVectorTemporary();
- const std::string coord = BuildCoords(operation).first;
+ const std::string coord = std::get<1>(BuildCoords(operation));
const std::size_t num_components = meta.derivates.size() / 2;
for (std::size_t index = 0; index < num_components; ++index) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b1c4cd62f..5772cad87 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,11 +22,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
-Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
- : VideoCommon::BufferBlock{cpu_addr, size} {
+Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
+ : BufferBlock{cpu_addr_, size_} {
gl_buffer.Create();
- glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
- if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
+ glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
+ if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
@@ -34,14 +34,14 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
Buffer::~Buffer() = default;
-void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
- glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
- data);
+void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
+ glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(data_size), data);
}
-void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
+void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
- const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
const GLintptr gl_offset = static_cast<GLintptr>(offset);
if (read_buffer.handle == 0) {
read_buffer.Create();
@@ -54,17 +54,16 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t size) {
+ std::size_t copy_size) {
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
- static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
+ static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
}
-OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
- Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
- const Device& device_, std::size_t stream_size)
- : GenericBufferCache{rasterizer, gpu_memory, cpu_memory,
- std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
- device{device_} {
+OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ const Device& device_, OGLStreamBuffer& stream_buffer_,
+ StateTracker& state_tracker)
+ : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
if (!device.HasFastBufferSubData()) {
return;
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f75b32e31..17ee90316 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -22,18 +22,19 @@ namespace OpenGL {
class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
+class StateTracker;
class Buffer : public VideoCommon::BufferBlock {
public:
- explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
+ explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
~Buffer();
- void Upload(std::size_t offset, std::size_t size, const u8* data);
+ void Upload(std::size_t offset, std::size_t data_size, const u8* data);
- void Download(std::size_t offset, std::size_t size, u8* data);
+ void Download(std::size_t offset, std::size_t data_size, u8* data);
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t size);
+ std::size_t copy_size);
GLuint Handle() const noexcept {
return gl_buffer.handle;
@@ -54,7 +55,8 @@ class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
- const Device& device, std::size_t stream_size);
+ const Device& device, OGLStreamBuffer& stream_buffer,
+ StateTracker& state_tracker);
~OGLBufferCache();
BufferInfo GetEmptyBuffer(std::size_t) override;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index e7d95149f..81b71edfb 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,9 +5,11 @@
#include <algorithm>
#include <array>
#include <cstddef>
+#include <cstdlib>
#include <cstring>
#include <limits>
#include <optional>
+#include <span>
#include <vector>
#include <glad/glad.h>
@@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
-constexpr std::array LimitUBOs = {
+constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
- GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
-
-constexpr std::array LimitSSBOs = {
+ GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
+};
+constexpr std::array LIMIT_SSBOS = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
-
-constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
- GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS,
- GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
-
-constexpr std::array LimitImages = {
+ GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
+};
+constexpr std::array LIMIT_SAMPLERS = {
+ GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
+ GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
+};
+constexpr std::array LIMIT_IMAGES = {
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
- GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
+ GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
+};
template <typename T>
T GetInteger(GLenum pname) {
@@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
return extensions;
}
-bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
- return std::find(images.begin(), images.end(), extension) != images.end();
+bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
+ return std::ranges::find(extensions, extension) != extensions.end();
}
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
@@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
- std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
- [](GLenum pname) { return GetInteger<u32>(pname); });
+ std::ranges::transform(LIMIT_UBOS, max.begin(),
+ [](GLenum pname) { return GetInteger<u32>(pname); });
return max;
}
@@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
- Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
- Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
- Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
+ Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
+ Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
+ Extract(base_samplers, num_samplers, total_samplers / NumStages,
+ LIMIT_SAMPLERS[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
@@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
// Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
- Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
+ Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
// This is guaranteed to be at least 1.
const u32 total_extracted_images = num_images / (NumStages - 1);
@@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
continue;
}
bindings[stage].image =
- Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
+ Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
}
// Compute doesn't care about any of this.
@@ -188,17 +193,22 @@ bool IsASTCSupported() {
return true;
}
+[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
+ const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
+ return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
+}
+
} // Anonymous namespace
Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
- const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
+ const bool is_intel = vendor == "Intel";
bool disable_fast_buffer_sub_data = false;
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
@@ -207,9 +217,8 @@ Device::Device()
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
-
- uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
- shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
+ uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
+ shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
@@ -223,8 +232,10 @@ Device::Device()
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
+ has_broken_texture_view_formats = is_amd || is_intel;
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
+ has_debugging_tool_attached = IsDebugToolAttached(extensions);
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -239,6 +250,8 @@ Device::Device()
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
+ LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
+ has_broken_texture_view_formats);
if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8a4b6b9fc..3e79d1e37 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -36,11 +36,11 @@ public:
return GetBaseBindings(static_cast<std::size_t>(shader_type));
}
- std::size_t GetUniformBufferAlignment() const {
+ size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
- std::size_t GetShaderStorageBufferAlignment() const {
+ size_t GetShaderStorageBufferAlignment() const {
return shader_storage_alignment;
}
@@ -96,6 +96,10 @@ public:
return has_precise_bug;
}
+ bool HasBrokenTextureViewFormats() const {
+ return has_broken_texture_view_formats;
+ }
+
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
@@ -104,6 +108,10 @@ public:
return has_nv_viewport_array2;
}
+ bool HasDebuggingToolAttached() const {
+ return has_debugging_tool_attached;
+ }
+
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
@@ -118,8 +126,8 @@ private:
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
- std::size_t uniform_buffer_alignment{};
- std::size_t shader_storage_alignment{};
+ size_t uniform_buffer_alignment{};
+ size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
@@ -133,8 +141,10 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
+ bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
+ bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index b532fdcc2..3e9c922f5 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -11,10 +11,10 @@
namespace OpenGL {
-GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
+GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
-GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
- : FenceBase(address, payload, is_stubbed) {}
+GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+ : FenceBase{address_, payload_, is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
@@ -45,10 +45,10 @@ void GLInnerFence::Wait() {
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
-FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
- TextureCacheOpenGL& texture_cache,
- OGLBufferCache& buffer_cache, QueryCache& query_cache)
- : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {}
+FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::GPU& gpu_, TextureCache& texture_cache_,
+ OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
+ : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index da1dcdace..30dbee613 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,8 +17,8 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
- GLInnerFence(u32 payload, bool is_stubbed);
- GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
+ explicit GLInnerFence(u32 payload_, bool is_stubbed_);
+ explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
~GLInnerFence();
void Queue();
@@ -33,13 +33,13 @@ private:
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
+ VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
- explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
- TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
- QueryCache& query_cache);
+ explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+ TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
+ QueryCache& query_cache_);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
deleted file mode 100644
index b8a512cb6..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
-
-namespace OpenGL {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using VideoCore::Surface::SurfaceType;
-
-FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
-
-FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
-
-GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
- const auto [entry, is_cache_miss] = cache.try_emplace(key);
- auto& framebuffer{entry->second};
- if (is_cache_miss) {
- framebuffer = CreateFramebuffer(key);
- }
- return framebuffer.handle;
-}
-
-OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
- OGLFramebuffer framebuffer;
- framebuffer.Create();
-
- // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
-
- if (key.zeta) {
- const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
- const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
- key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
- }
-
- std::size_t num_buffers = 0;
- std::array<GLenum, Maxwell::NumRenderTargets> targets;
-
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- if (!key.colors[index]) {
- targets[index] = GL_NONE;
- continue;
- }
- const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
- key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
-
- const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
- targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
- num_buffers = index + 1;
- }
-
- if (num_buffers > 0) {
- glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
- } else {
- glDrawBuffer(GL_NONE);
- }
-
- return framebuffer;
-}
-
-std::size_t FramebufferCacheKey::Hash() const noexcept {
- std::size_t hash = std::hash<View>{}(zeta);
- for (const auto& color : colors) {
- hash ^= std::hash<View>{}(color);
- }
- hash ^= static_cast<std::size_t>(color_attachments) << 16;
- return hash;
-}
-
-bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
- return std::tie(colors, zeta, color_attachments) ==
- std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
deleted file mode 100644
index 8f698fee0..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <unordered_map>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_texture_cache.h"
-
-namespace OpenGL {
-
-constexpr std::size_t BitsPerAttachment = 4;
-
-struct FramebufferCacheKey {
- View zeta;
- std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
- u32 color_attachments = 0;
-
- std::size_t Hash() const noexcept;
-
- bool operator==(const FramebufferCacheKey& rhs) const noexcept;
-
- bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- void SetAttachment(std::size_t index, u32 attachment) {
- color_attachments |= attachment << (BitsPerAttachment * index);
- }
-};
-
-} // namespace OpenGL
-
-namespace std {
-
-template <>
-struct hash<OpenGL::FramebufferCacheKey> {
- std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
-
-namespace OpenGL {
-
-class FramebufferCacheOpenGL {
-public:
- FramebufferCacheOpenGL();
- ~FramebufferCacheOpenGL();
-
- GLuint GetFramebuffer(const FramebufferCacheKey& key);
-
-private:
- OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
-
- std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 1a3d9720e..acebbf5f4 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -30,11 +30,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::MemoryManager& gpu_memory)
- : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>(
- rasterizer, maxwell3d, gpu_memory),
- gl_rasterizer{rasterizer} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_)
+ : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
@@ -59,10 +57,11 @@ bool QueryCache::AnyCommandQueued() const noexcept {
return gl_rasterizer.AnyCommandQueued();
}
-HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type)
- : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
- type{type}, query{cache.AllocateQuery(type)} {
+HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+ VideoCore::QueryType type_)
+ : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
+ cache.AllocateQuery(
+ type)} {
glBeginQuery(GetTarget(type), query.handle);
}
@@ -86,13 +85,14 @@ u64 HostCounter::BlockingQuery() const {
return static_cast<u64>(value);
}
-CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
- : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
+CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
+ u8* host_ptr_)
+ : CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
CachedQuery::~CachedQuery() = default;
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
- : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
+ : CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
cache = rhs.cache;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 82cac51ee..7bbe5cfe9 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -29,8 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::MemoryManager& gpu_memory);
+ explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -46,8 +46,8 @@ private:
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
- explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type);
+ explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+ VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
@@ -62,8 +62,8 @@ private:
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
- explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
- u8* host_ptr);
+ explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
+ u8* host_ptr_);
~CachedQuery() override;
CachedQuery(CachedQuery&& rhs) noexcept;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bbb2eb17c..8aa63d329 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,12 +25,15 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h"
+#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
@@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
namespace {
-constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
-constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
+constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
+constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
-constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
+constexpr size_t TOTAL_CONST_BUFFER_BYTES =
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
-constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
+constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
+constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
+
+constexpr size_t MAX_TEXTURES = 192;
+constexpr size_t MAX_IMAGES = 48;
+
+struct TextureHandle {
+ constexpr TextureHandle(u32 data, bool via_header_index) {
+ const Tegra::Texture::TextureHandle handle{data};
+ image = handle.tic_id;
+ sampler = via_header_index ? image : handle.tsc_id.Value();
+ }
+
+ u32 image;
+ u32 sampler;
+};
template <typename Engine, typename Entry>
-Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- ShaderType shader_type, std::size_t index = 0) {
+TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
+ ShaderType shader_type, size_t index = 0) {
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
if (entry.is_separated) {
const u32 buffer_1 = entry.buffer;
@@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
const u32 offset_2 = entry.secondary_offset;
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return engine.GetTextureInfo(handle_1 | handle_2);
+ return TextureHandle(handle_1 | handle_2, via_header_index);
}
}
if (entry.is_bindless) {
- const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return engine.GetTextureInfo(handle);
- }
-
- const auto& gpu_profile = engine.AccessGuestDriverProfile();
- const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
- if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
- return engine.GetStageTexture(shader_type, offset);
- } else {
- return engine.GetTexture(offset);
+ const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+ return TextureHandle(raw, via_header_index);
}
+ const u32 buffer = engine.GetBoundBuffer();
+ const u64 offset = (entry.offset + index) * sizeof(u32);
+ return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
@@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
if (!entry.IsIndirect()) {
return entry.GetSize();
}
-
if (buffer.size > Maxwell::MaxConstBufferSize) {
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
Maxwell::MaxConstBufferSize);
@@ -131,7 +142,7 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
case 43:
return {GL_BACK_SECONDARY_COLOR_NV, 0};
}
- UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
+ UNIMPLEMENTED_MSG("index={}", index);
return {GL_POSITION, 0};
}
@@ -139,35 +150,68 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
- if (num_entries == 0) {
+void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
+ if (num_ssbos == 0) {
return;
}
- if (num_entries % 2 == 1) {
- pointers[num_entries] = 0;
+ glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
+ reinterpret_cast<const GLuint*>(ssbos));
+}
+
+ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
+ if (entry.is_buffer) {
+ return ImageViewType::Buffer;
+ }
+ switch (entry.type) {
+ case Tegra::Shader::TextureType::Texture1D:
+ return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
+ case Tegra::Shader::TextureType::Texture2D:
+ return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
+ case Tegra::Shader::TextureType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::TextureType::TextureCube:
+ return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
+ }
+ UNREACHABLE();
+ return ImageViewType::e2D;
+}
+
+ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
+ switch (entry.type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return ImageViewType::e1D;
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return ImageViewType::e1DArray;
+ case Tegra::Shader::ImageType::Texture2D:
+ return ImageViewType::e2D;
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return ImageViewType::e2DArray;
+ case Tegra::Shader::ImageType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return ImageViewType::Buffer;
}
- const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
- glProgramLocalParametersI4uivNV(target, 0, num_vectors,
- reinterpret_cast<const GLuint*>(pointers));
+ UNREACHABLE();
+ return ImageViewType::e2D;
}
} // Anonymous namespace
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
- Core::Memory::Memory& cpu_memory, const Device& device_,
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Core::Memory::Memory& cpu_memory_, const Device& device_,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_)
- : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
+ : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
- texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
- shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+ stream_buffer(device, state_tracker),
+ texture_cache_runtime(device, program_manager, state_tracker),
+ texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory),
- buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE),
+ buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
- async_shaders(emu_window) {
- CheckExtensions();
-
+ async_shaders(emu_window_) {
unified_uniform_buffer.Create();
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
@@ -178,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra:
nullptr, 0);
}
}
-
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
@@ -190,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
-void RasterizerOpenGL::CheckExtensions() {
- if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
- LOG_WARNING(
- Render_OpenGL,
- "Anisotropic filter is not supported! This can cause graphical issues in some games.");
- }
-}
-
void RasterizerOpenGL::SetupVertexFormat() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
@@ -320,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
return info.offset;
}
-void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
+void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader);
u32 clip_distances = 0;
+ std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
+ image_view_indices.clear();
+ sampler_handles.clear();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -342,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
continue;
}
-
// Currently this stages are not supported in the OpenGL backend.
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
if (program == Maxwell::ShaderProgram::TesselationControl ||
@@ -351,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
-
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -367,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
default:
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
+ break;
}
// Stage indices are 0 - 5
- const std::size_t stage = index == 0 ? 0 : index - 1;
+ const size_t stage = index == 0 ? 0 : index - 1;
+ shaders[stage] = shader;
+
SetupDrawConstBuffers(stage, shader);
SetupDrawGlobalMemory(stage, shader);
- SetupDrawTextures(stage, shader);
- SetupDrawImages(stage, shader);
+ SetupDrawTextures(shader, stage);
+ SetupDrawImages(shader, stage);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -388,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
++index;
}
}
-
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ size_t image_view_index = 0;
+ size_t texture_index = 0;
+ size_t image_index = 0;
+ for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+ const Shader* const shader = shaders[stage];
+ if (shader) {
+ const auto base = device.GetBaseBindings(stage);
+ BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
+ texture_index, image_index);
+ }
+ }
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -421,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
-void RasterizerOpenGL::ConfigureFramebuffers() {
- MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
- return;
- }
- maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
-
- texture_cache.GuardRenderTargets(true);
-
- View depth_surface = texture_cache.GetDepthBufferSurface(true);
-
- const auto& regs = maxwell3d.regs;
- UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
-
- // Bind the framebuffer surfaces
- FramebufferCacheKey key;
- const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
- for (std::size_t index = 0; index < colors_count; ++index) {
- View color_surface{texture_cache.GetColorBufferSurface(index, true)};
- if (!color_surface) {
- continue;
- }
- // Assume that a surface will be written to if it is used as a framebuffer, even
- // if the shader doesn't actually write to it.
- texture_cache.MarkColorBufferInUse(index);
-
- key.SetAttachment(index, regs.rt_control.GetMap(index));
- key.colors[index] = std::move(color_surface);
- }
-
- if (depth_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even if
- // the shader doesn't actually write to it.
- texture_cache.MarkDepthBufferInUse();
- key.zeta = std::move(depth_surface);
- }
-
- texture_cache.GuardRenderTargets(false);
-
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
-}
-
-void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
- const auto& regs = maxwell3d.regs;
-
- texture_cache.GuardRenderTargets(true);
- View color_surface;
-
- if (using_color) {
- // Determine if we have to preserve the contents.
- // First we have to make sure all clear masks are enabled.
- bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
- !regs.clear_buffers.B || !regs.clear_buffers.A;
- const std::size_t index = regs.clear_buffers.RT;
- if (regs.clear_flags.scissor) {
- // Then we have to confirm scissor testing clears the whole image.
- const auto& scissor = regs.scissor_test[0];
- preserve_contents |= scissor.min_x > 0;
- preserve_contents |= scissor.min_y > 0;
- preserve_contents |= scissor.max_x < regs.rt[index].width;
- preserve_contents |= scissor.max_y < regs.rt[index].height;
- }
-
- color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
- texture_cache.MarkColorBufferInUse(index);
- }
-
- View depth_surface;
- if (using_depth_stencil) {
- bool preserve_contents = false;
- if (regs.clear_flags.scissor) {
- // For depth stencil clears we only have to confirm scissor test covers the whole image.
- const auto& scissor = regs.scissor_test[0];
- preserve_contents |= scissor.min_x > 0;
- preserve_contents |= scissor.min_y > 0;
- preserve_contents |= scissor.max_x < regs.zeta_width;
- preserve_contents |= scissor.max_y < regs.zeta_height;
- }
-
- depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
- texture_cache.MarkDepthBufferInUse();
- }
- texture_cache.GuardRenderTargets(false);
-
- FramebufferCacheKey key;
- key.colors[0] = std::move(color_surface);
- key.zeta = std::move(depth_surface);
-
- state_tracker.NotifyFramebuffer();
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
-}
-
void RasterizerOpenGL::Clear() {
if (!maxwell3d.ShouldExecute()) {
return;
@@ -527,8 +491,9 @@ void RasterizerOpenGL::Clear() {
regs.clear_buffers.A) {
use_color = true;
- state_tracker.NotifyColorMask0();
- glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
+ const GLuint index = regs.clear_buffers.RT;
+ state_tracker.NotifyColorMask(index);
+ glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
// TODO(Rodrigo): Determine if clamping is used on clears
@@ -561,15 +526,17 @@ void RasterizerOpenGL::Clear() {
state_tracker.NotifyScissor0();
glDisablei(GL_SCISSOR_TEST, 0);
}
-
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.UpdateRenderTargets(true);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+ }
if (use_color) {
- glClearBufferfv(GL_COLOR, 0, regs.clear_color);
+ glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
-
if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
} else if (use_depth) {
@@ -626,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
// Prepare the vertex array.
- const bool invalidated = buffer_cache.Map(buffer_size);
-
- if (invalidated) {
- // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
- auto& dirty = maxwell3d.dirty.flags;
- dirty[Dirty::VertexBuffers] = true;
- for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
- dirty[index] = true;
- }
- }
+ buffer_cache.Map(buffer_size);
// Prepare vertex array format.
SetupVertexFormat();
@@ -659,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
// Setup shaders and their used resources.
- texture_cache.GuardSamplers(true);
- const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
- SetupShaders(primitive_mode);
- texture_cache.GuardSamplers(false);
-
- ConfigureFramebuffers();
+ auto lock = texture_cache.AcquireLock();
+ SetupShaders();
// Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap();
-
+ texture_cache.UpdateRenderTargets(false);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
- if (texture_cache.TextureBarrier()) {
- glTextureBarrier();
- }
-
+ const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
@@ -726,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Acquire();
current_cbuf = 0;
- auto kernel = shader_cache.GetComputeKernel(code_addr);
- program_manager.BindCompute(kernel->GetHandle());
+ Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
- SetupComputeTextures(kernel);
- SetupComputeImages(kernel);
+ auto lock = texture_cache.AcquireLock();
+ BindComputeTextures(kernel);
- const std::size_t buffer_size =
- Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+ const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
+ (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
SetupComputeConstBuffers(kernel);
@@ -743,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
const auto& launch_desc = kepler_compute.launch_description;
- program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -764,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.FlushRegion(addr, size);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.DownloadMemory(addr, size);
+ }
buffer_cache.FlushRegion(addr, size);
query_cache.FlushRegion(addr, size);
}
@@ -773,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
- return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+ return texture_cache.IsRegionGpuModified(addr, size) ||
+ buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -781,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.InvalidateRegion(addr, size);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.WriteMemory(addr, size);
+ }
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
@@ -792,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.OnCPUWrite(addr, size);
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.WriteMemory(addr, size);
+ }
shader_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
}
+void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.UnmapMemory(addr, size);
+ }
+ buffer_cache.OnCPUWrite(addr, size);
+ shader_cache.OnCPUWrite(addr, size);
+}
+
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
@@ -845,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
}
+void RasterizerOpenGL::FragmentBarrier() {
+ glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
+}
+
+void RasterizerOpenGL::TiledCacheBarrier() {
+ glTextureBarrier();
+}
+
void RasterizerOpenGL::FlushCommands() {
// Only flush when we have commands queued to OpenGL.
if (num_queued_commands == 0) {
@@ -858,53 +833,103 @@ void RasterizerOpenGL::TickFrame() {
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
num_queued_commands = 0;
+ fence_manager.TickFrame();
buffer_cache.TickFrame();
+ {
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.TickFrame();
+ }
}
-bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
- texture_cache.DoFermiCopy(src, dst, copy_config);
+ auto lock = texture_cache.AcquireLock();
+ texture_cache.BlitImage(dst, src, copy_config);
return true;
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
- if (!framebuffer_addr) {
- return {};
+ if (framebuffer_addr == 0) {
+ return false;
}
-
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
- if (!surface) {
- return {};
+ auto lock = texture_cache.AcquireLock();
+ ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
+ if (!image_view) {
+ return false;
}
-
// Verify that the cached surface is the same size and format as the requested framebuffer
- const auto& params{surface->GetSurfaceParams()};
- const auto& pixel_format{
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
- ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
- ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
+ // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
+ // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
- if (params.pixel_format != pixel_format) {
- LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
- }
+ screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+ screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
+ return true;
+}
- screen_info.display_texture = surface->GetTexture();
- screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
+void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
+ image_view_indices.clear();
+ sampler_handles.clear();
- return true;
+ texture_cache.SynchronizeComputeDescriptors();
+
+ SetupComputeTextures(kernel);
+ SetupComputeImages(kernel);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ program_manager.BindCompute(kernel->GetHandle());
+ size_t image_view_index = 0;
+ size_t texture_index = 0;
+ size_t image_index = 0;
+ BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
+}
+
+void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
+ GLuint base_image, size_t& image_view_index,
+ size_t& texture_index, size_t& image_index) {
+ const GLuint* const samplers = sampler_handles.data() + texture_index;
+ const GLuint* const textures = texture_handles.data() + texture_index;
+ const GLuint* const images = image_handles.data() + image_index;
+
+ const size_t num_samplers = entries.samplers.size();
+ for (const auto& sampler : entries.samplers) {
+ for (size_t i = 0; i < sampler.size; ++i) {
+ const ImageViewId image_view_id = image_view_ids[image_view_index++];
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
+ texture_handles[texture_index++] = handle;
+ }
+ }
+ const size_t num_images = entries.images.size();
+ for (size_t unit = 0; unit < num_images; ++unit) {
+ // TODO: Mark as modified
+ const ImageViewId image_view_id = image_view_ids[image_view_index++];
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
+ image_handles[image_index] = handle;
+ ++image_index;
+ }
+ if (num_samplers > 0) {
+ glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
+ glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
+ }
+ if (num_images > 0) {
+ glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
+ }
}
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
- static constexpr std::array PARAMETER_LUT = {
- GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+ static constexpr std::array PARAMETER_LUT{
+ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
- GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
-
+ GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
+ };
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index];
@@ -1003,12 +1028,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
-
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries};
- std::array<GLuint64EXT, 32> pointers;
- ASSERT(entries.size() < pointers.size());
+ std::array<BindlessSSBO, 32> ssbos;
+ ASSERT(entries.size() < ssbos.size());
const bool assembly_shaders = device.UseAssemblyShaders();
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
@@ -1016,11 +1040,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (assembly_shaders) {
- UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
+ UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
}
}
@@ -1028,106 +1052,85 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
const auto& entries{kernel->GetEntries().global_memory_entries};
- std::array<GLuint64EXT, 32> pointers;
- ASSERT(entries.size() < pointers.size());
+ std::array<BindlessSSBO, 32> ssbos;
+ ASSERT(entries.size() < ssbos.size());
u32 binding = 0;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (device.UseAssemblyShaders()) {
- UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
+ UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
- GPUVAddr gpu_addr, std::size_t size,
- GLuint64EXT* pointer) {
- const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
+ GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
+ const size_t alignment{device.GetShaderStorageBufferAlignment()};
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
if (device.UseAssemblyShaders()) {
- *pointer = info.address + info.offset;
+ *ssbo = BindlessSSBO{
+ .address = static_cast<GLuint64EXT>(info.address + info.offset),
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
static_cast<GLsizeiptr>(size));
}
}
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
- MICROPROFILE_SCOPE(OpenGL_Texture);
- u32 binding = device.GetBaseBindings(stage_index).sampler;
+void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
+ const bool via_header_index =
+ maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
- SetupTexture(binding++, texture, entry);
+ for (size_t index = 0; index < entry.size; ++index) {
+ const auto handle =
+ GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
+ const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
+ image_view_indices.push_back(handle.image);
}
}
}
-void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
- MICROPROFILE_SCOPE(OpenGL_Texture);
- u32 binding = 0;
+void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : kernel->GetEntries().samplers) {
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
- SetupTexture(binding++, texture, entry);
+ for (size_t i = 0; i < entry.size; ++i) {
+ const auto handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
+ const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
+ image_view_indices.push_back(handle.image);
}
}
}
-void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const SamplerEntry& entry) {
- const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
- if (!view) {
- // Can occur when texture addr is null or its memory is unmapped/invalid
- glBindSampler(binding, 0);
- glBindTextureUnit(binding, 0);
- return;
- }
- const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
- glBindTextureUnit(binding, handle);
- if (!view->GetSurfaceParams().IsBuffer()) {
- glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
- }
-}
-
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
- u32 binding = device.GetBaseBindings(stage_index).image;
+void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
+ const bool via_header_index =
+ maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<ShaderType>(stage_index);
- const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
- SetupImage(binding++, tic, entry);
+ const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
- u32 binding = 0;
+void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : shader->GetEntries().images) {
- const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
- SetupImage(binding++, tic, entry);
+ const auto handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
- const ImageEntry& entry) {
- const auto view = texture_cache.GetImageSurface(tic, entry);
- if (!view) {
- glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
- return;
- }
- if (entry.is_written) {
- view->MarkAsModified(texture_cache.Tick());
- }
- const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
- glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
-}
-
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
@@ -1157,7 +1160,7 @@ void RasterizerOpenGL::SyncViewport() {
flags[Dirty::ClipControl] = false;
bool flip_y = false;
- if (regs.viewport_transform[0].scale_y < 0.0) {
+ if (regs.viewport_transform[0].scale_y < 0.0f) {
flip_y = !flip_y;
}
if (regs.screen_y_control.y_negate != 0) {
@@ -1527,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
flags[Dirty::PointSize] = false;
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
+ oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
- if (maxwell3d.regs.vp_point_size.enable) {
- // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
- glEnable(GL_PROGRAM_POINT_SIZE);
- return;
- }
-
- // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
- // in OpenGL).
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
- glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncLineState() {
@@ -1580,10 +1575,6 @@ void RasterizerOpenGL::SyncAlphaTest() {
flags[Dirty::AlphaTest] = false;
const auto& regs = maxwell3d.regs;
- if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
- LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
- }
-
if (regs.alpha_test_enabled) {
glEnable(GL_ALPHA_TEST);
glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f451404b2..82e03e677 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,12 +7,13 @@
#include <array>
#include <atomic>
#include <cstddef>
-#include <map>
#include <memory>
#include <optional>
#include <tuple>
#include <utility>
+#include <boost/container/static_vector.hpp>
+
#include <glad/glad.h>
#include "common/common_types.h"
@@ -23,16 +24,14 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
-#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
+#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
@@ -51,14 +50,21 @@ class MemoryManager;
namespace OpenGL {
struct ScreenInfo;
-struct DrawParameters;
+struct ShaderEntries;
+
+struct BindlessSSBO {
+ GLuint64EXT address;
+ GLsizei length;
+ GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
- Core::Memory::Memory& cpu_memory, const Device& device,
- ScreenInfo& screen_info, ProgramManager& program_manager,
- StateTracker& state_tracker);
+ explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Core::Memory::Memory& cpu_memory_, const Device& device_,
+ ScreenInfo& screen_info_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -72,15 +78,18 @@ public:
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
+ void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
+ void FragmentBarrier() override;
+ void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
- bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+ bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
@@ -101,11 +110,14 @@ public:
}
private:
- /// Configures the color and depth framebuffer states.
- void ConfigureFramebuffers();
+ static constexpr size_t MAX_TEXTURES = 192;
+ static constexpr size_t MAX_IMAGES = 48;
+ static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
+
+ void BindComputeTextures(Shader* kernel);
- /// Configures the color and depth framebuffer for clearing.
- void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
+ void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
+ size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
@@ -126,26 +138,19 @@ private:
/// Configures a global memory buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
- std::size_t size, GLuint64EXT* pointer);
+ size_t size, BindlessSSBO* ssbo);
/// Configures the current textures to use for the draw command.
- void SetupDrawTextures(std::size_t stage_index, Shader* shader);
+ void SetupDrawTextures(const Shader* shader, size_t stage_index);
/// Configures the textures used in a compute shader.
- void SetupComputeTextures(Shader* kernel);
-
- /// Configures a texture.
- void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const SamplerEntry& entry);
+ void SetupComputeTextures(const Shader* kernel);
/// Configures images in a graphics shader.
- void SetupDrawImages(std::size_t stage_index, Shader* shader);
+ void SetupDrawImages(const Shader* shader, size_t stage_index);
/// Configures images in a compute shader.
- void SetupComputeImages(Shader* shader);
-
- /// Configures an image.
- void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
+ void SetupComputeImages(const Shader* shader);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -220,9 +225,6 @@ private:
/// End a transform feedback
void EndTransformFeedback();
- /// Check for extension that are not strictly required but are needed for correct emulation
- void CheckExtensions();
-
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
@@ -235,7 +237,7 @@ private:
GLintptr SetupIndexBuffer();
- void SetupShaders(GLenum primitive_mode);
+ void SetupShaders();
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -247,19 +249,21 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
- TextureCacheOpenGL texture_cache;
+ OGLStreamBuffer stream_buffer;
+ TextureCacheRuntime texture_cache_runtime;
+ TextureCache texture_cache;
ShaderCacheOpenGL shader_cache;
- SamplerCacheOpenGL sampler_cache;
- FramebufferCacheOpenGL framebuffer_cache;
QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
- static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
-
- GLint vertex_binding = 0;
+ boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
+ std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
+ boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
+ std::array<GLuint, MAX_TEXTURES> texture_handles;
+ std::array<GLuint, MAX_IMAGES> image_handles;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
@@ -273,7 +277,7 @@ private:
std::size_t current_cbuf = 0;
OGLBuffer unified_uniform_buffer;
- /// Number of commands queued to the OpenGL driver. Reseted on flush.
+ /// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
u32 last_clip_distance_mask = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0ebcec427..0e34a0f20 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -71,7 +71,7 @@ void OGLSampler::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- glGenSamplers(1, &handle);
+ glCreateSamplers(1, &handle);
}
void OGLSampler::Release() {
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
deleted file mode 100644
index 5c174879a..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_sampler_cache.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
-
-namespace OpenGL {
-
-SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
-
-SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
-
-OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
- OGLSampler sampler;
- sampler.Create();
-
- const GLuint sampler_id{sampler.handle};
- glSamplerParameteri(
- sampler_id, GL_TEXTURE_MAG_FILTER,
- MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
- glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
- MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
- tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
- MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
- glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
- glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
- glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
- if (GLAD_GL_ARB_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
- } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
- } else {
- LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
- }
-
- return sampler;
-}
-
-GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
- return sampler.handle;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
deleted file mode 100644
index 34ee37f00..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <glad/glad.h>
-
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/sampler_cache.h"
-
-namespace OpenGL {
-
-class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
-public:
- explicit SamplerCacheOpenGL();
- ~SamplerCacheOpenGL();
-
-protected:
- OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
-
- GLuint ToSamplerType(const OGLSampler& sampler) const override;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index bd56bed0c..d4841fdb7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -27,7 +27,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
@@ -198,10 +197,10 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
return program;
}
-Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
- ProgramSharedPtr program_, bool is_built)
+Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
+ ProgramSharedPtr program_, bool is_built_)
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
- is_built(is_built) {
+ is_built{is_built_} {
handle = program->assembly_program.handle;
if (handle == 0) {
handle = program->source_program.handle;
@@ -318,14 +317,13 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer,
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_)
- : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_},
- gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, device{device_} {}
+ : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
+ maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
@@ -460,7 +458,7 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) {
- if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
+ if (!supported_formats.contains(precompiled_entry.binary_format)) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
return {};
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 1708af06a..2aed0697e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -108,7 +108,7 @@ public:
private:
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
- ProgramSharedPtr program, bool is_built = true);
+ ProgramSharedPtr program, bool is_built_ = true);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
@@ -119,10 +119,11 @@ private:
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window,
- Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::KeplerCompute& kepler_compute,
- Tegra::MemoryManager& gpu_memory, const Device& device);
+ explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
+ Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_);
~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bbb8fb095..2e1fa252d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using Tegra::Shader::TextureType;
-using VideoCommon::Shader::BuildTransformFeedback;
-using VideoCommon::Shader::Registry;
-using namespace std::string_literals;
using namespace VideoCommon::Shader;
+using namespace std::string_literals;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
@@ -131,7 +129,7 @@ private:
class Expression final {
public:
- Expression(std::string code, Type type) : code{std::move(code)}, type{type} {
+ Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
ASSERT(type != Type::Void);
}
Expression() : type{Type::Void} {}
@@ -148,8 +146,8 @@ public:
ASSERT(type == Type::Void);
}
- std::string As(Type type) const {
- switch (type) {
+ std::string As(Type type_) const {
+ switch (type_) {
case Type::Bool:
return AsBool();
case Type::Bool2:
@@ -316,7 +314,7 @@ std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology t
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return {"triangles_adjacency", 6};
default:
- UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ UNIMPLEMENTED_MSG("topology={}", topology);
return {"points", 1};
}
}
@@ -342,7 +340,7 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
case Tegra::Shader::OutputTopology::TriangleStrip:
return "triangle_strip";
default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+ UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
return "points";
}
}
@@ -418,11 +416,12 @@ struct GenericVaryingDescription {
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier, std::string_view suffix)
- : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
- suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
- UseUnifiedUniforms(device, ir, stage)} {
+ explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
+ ShaderType stage_, std::string_view identifier_,
+ std::string_view suffix_)
+ : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
+ suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
+ UseUnifiedUniforms(device_, ir_, stage_)} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@@ -744,7 +743,7 @@ private:
case PixelImap::Unused:
break;
}
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
return {};
}
@@ -777,16 +776,16 @@ private:
name = "gs_" + name + "[]";
}
- std::string suffix;
+ std::string suffix_;
if (stage == ShaderType::Fragment) {
const auto input_mode{header.ps.GetPixelImap(location)};
if (input_mode == PixelImap::Unused) {
return;
}
- suffix = GetInputFlags(input_mode);
+ suffix_ = GetInputFlags(input_mode);
}
- code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
+ code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
}
void DeclareOutputAttributes() {
@@ -877,7 +876,7 @@ private:
}
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto [index, info] : ir.GetConstantBuffers()) {
+ for (const auto& [index, info] : ir.GetConstantBuffers()) {
const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
@@ -1251,7 +1250,7 @@ private:
}
break;
}
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+ UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
return {"0", Type::Int};
}
@@ -1331,7 +1330,7 @@ private:
GetSwizzle(element)),
Type::Float}};
}
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
+ UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
return std::nullopt;
}
}
@@ -2056,15 +2055,19 @@ private:
}
Expression Texture(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- std::string expr = GenerateTexture(
- operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
- if (meta->sampler.is_shadow) {
- expr = "vec4(" + expr + ')';
+ const auto meta = std::get<MetaTexture>(operation.GetMeta());
+ const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
+ meta.sampler.is_array && meta.sampler.is_shadow;
+ // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
+ const std::vector<TextureIR> extras{
+ TextureOffset{},
+ TextureArgument{Type::Float, meta.bias},
+ };
+ std::string expr = GenerateTexture(operation, "", extras, separate_dc);
+ if (meta.sampler.is_shadow) {
+ expr = fmt::format("vec4({})", expr);
}
- return {expr + GetSwizzle(meta->element), Type::Float};
+ return {expr + GetSwizzle(meta.element), Type::Float};
}
Expression TextureLod(Operation operation) {
@@ -2096,13 +2099,13 @@ private:
const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
const bool separate_dc = meta.sampler.is_shadow;
- std::vector<TextureIR> ir;
+ std::vector<TextureIR> ir_;
if (meta.sampler.is_shadow) {
- ir = {TextureOffset{}};
+ ir_ = {TextureOffset{}};
} else {
- ir = {TextureOffset{}, TextureArgument{type, meta.component}};
+ ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
}
- return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
+ return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
Type::Float};
}
@@ -2748,11 +2751,11 @@ private:
}
}
- std::string GetSampler(const Sampler& sampler) const {
+ std::string GetSampler(const SamplerEntry& sampler) const {
return AppendSuffix(sampler.index, "sampler");
}
- std::string GetImage(const Image& image) const {
+ std::string GetImage(const ImageEntry& image) const {
return AppendSuffix(image.index, "image");
}
@@ -2797,7 +2800,7 @@ std::string GetFlowVariable(u32 index) {
class ExprDecompiler {
public:
- explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+ explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
void operator()(const ExprAnd& expr) {
inner += '(';
@@ -2852,7 +2855,7 @@ private:
class ASTDecompiler {
public:
- explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+ explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
void operator()(const ASTProgram& ast) {
ASTNode current = ast.nodes.GetFirst();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 451c9689a..be68994bb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,13 +20,13 @@ namespace OpenGL {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::Sampler;
-using ImageEntry = VideoCommon::Shader::Image;
+using SamplerEntry = VideoCommon::Shader::SamplerEntry;
+using ImageEntry = VideoCommon::Shader::ImageEntry;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
- explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
- : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
+ explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
+ : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
u32 GetIndex() const {
return index;
@@ -37,10 +37,10 @@ private:
};
struct GlobalMemoryEntry {
- constexpr explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read,
- bool is_written)
- : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{
- is_written} {}
+ constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
+ bool is_written_)
+ : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
+ is_written_} {}
u32 cbuf_index = 0;
u32 cbuf_offset = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 166ee34e1..955b2abc4 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -317,8 +317,7 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
return std::nullopt;
}
}
-
- return std::move(entries);
+ return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -344,7 +343,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
}
const u64 id = entry.unique_identifier;
- if (stored_transferable.find(id) != stored_transferable.end()) {
+ if (stored_transferable.contains(id)) {
// The shader already exists
return;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 691c6c79b..553e6e8d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
}
}
+void ProgramManager::BindHostCompute(GLuint program) {
+ if (use_assembly_programs) {
+ glDisable(GL_COMPUTE_PROGRAM_NV);
+ }
+ glUseProgram(program);
+ is_graphics_bound = false;
+}
+
+void ProgramManager::RestoreGuestCompute() {
+ if (use_assembly_programs) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ glUseProgram(0);
+ }
+}
+
void ProgramManager::UseVertexShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 950e0dfcb..ad42cce74 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,6 +45,12 @@ public:
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
+ /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
+ void BindHostCompute(GLuint program);
+
+ /// Rewinds BindHostCompute state changes.
+ void RestoreGuestCompute();
+
void UseVertexShader(GLuint program);
void UseGeometryShader(GLuint program);
void UseFragmentShader(GLuint program);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 6bcf831f2..60e6fa39f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -13,7 +13,7 @@
#include "video_core/renderer_opengl/gl_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
-#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
+#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace OpenGL {
@@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
}
}
+void StateTracker::InvalidateStreamBuffer() {
+ flags[Dirty::VertexBuffers] = true;
+ for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
+ flags[index] = true;
+ }
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 9d127548f..574615d3c 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -92,6 +92,8 @@ class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
+ void InvalidateStreamBuffer();
+
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
@@ -100,6 +102,14 @@ public:
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
+ void BindFramebuffer(GLuint new_framebuffer) {
+ if (framebuffer == new_framebuffer) {
+ return;
+ }
+ framebuffer = new_framebuffer;
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
+ }
+
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
@@ -129,9 +139,9 @@ public:
flags[OpenGL::Dirty::Scissor0] = true;
}
- void NotifyColorMask0() {
+ void NotifyColorMask(size_t index) {
flags[OpenGL::Dirty::ColorMasks] = true;
- flags[OpenGL::Dirty::ColorMask0] = true;
+ flags[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
@@ -190,6 +200,7 @@ public:
private:
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+ GLuint framebuffer = 0;
GLuint index_buffer = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 887995cf4..e0819cdf2 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,6 +9,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
-OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
- : buffer_size(size) {
+OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
+ : state_tracker{state_tracker_} {
gl_buffer.Create();
- GLsizeiptr allocate_size = size;
- if (vertex_data_usage) {
- // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
- // read position is near the end and is an out-of-bound access to the vertex buffer. This is
- // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
- // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
- // crash.
- allocate_size *= 2;
- }
-
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
- glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
+ glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
mapped_ptr = static_cast<u8*>(
- glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
+ glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
@@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
gl_buffer.Release();
}
-std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
- ASSERT(size <= buffer_size);
- ASSERT(alignment <= buffer_size);
+std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
+ ASSERT(size <= BUFFER_SIZE);
+ ASSERT(alignment <= BUFFER_SIZE);
mapped_size = size;
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}
- bool invalidate = false;
- if (buffer_pos + size > buffer_size) {
+ if (buffer_pos + size > BUFFER_SIZE) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
glInvalidateBufferData(gl_buffer.handle);
+ state_tracker.InvalidateStreamBuffer();
buffer_pos = 0;
- invalidate = true;
}
- return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
+ return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 307a67113..dd9cf67eb 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -4,29 +4,31 @@
#pragma once
-#include <tuple>
+#include <utility>
+
#include <glad/glad.h>
+
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class Device;
+class StateTracker;
class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
+ explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
~OGLStreamBuffer();
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
- * The return values are the pointer to the new chunk, the offset within the buffer,
- * and the invalidation flag for previous chunks.
+ * The return values are the pointer to the new chunk, and the offset within the buffer.
* The actual used size must be specified on unmapping the chunk.
*/
- std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
+ std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
void Unmap(GLsizeiptr size);
@@ -39,15 +41,18 @@ public:
}
GLsizeiptr Size() const noexcept {
- return buffer_size;
+ return BUFFER_SIZE;
}
private:
+ static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
+
+ StateTracker& state_tracker;
+
OGLBuffer gl_buffer;
GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
- GLsizeiptr buffer_size = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index a863ef218..546cb6d00 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -2,37 +2,60 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "common/assert.h"
-#include "common/bit_util.h"
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "common/scope_exit.h"
-#include "core/core.h"
-#include "video_core/morton.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <string>
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/renderer_opengl/utils.h"
-#include "video_core/texture_cache/surface_base.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+#include "video_core/renderer_opengl/util_shaders.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
-#include "video_core/textures/convert.h"
-#include "video_core/textures/texture.h"
+#include "video_core/textures/decoders.h"
namespace OpenGL {
-using Tegra::Texture::SwizzleSource;
-using VideoCore::MortonSwizzleMode;
+namespace {
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TextureMipmapFilter;
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+using VideoCommon::CalculateLevelStrideAlignment;
+using VideoCommon::ImageCopy;
+using VideoCommon::ImageFlagBits;
+using VideoCommon::ImageType;
+using VideoCommon::NUM_RT;
+using VideoCommon::SamplesLog2;
+using VideoCommon::SwizzleParameters;
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::IsPixelFormatSRGB;
+using VideoCore::Surface::MaxPixelFormat;
using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
-MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
- MP_RGB(128, 192, 128));
+struct CopyOrigin {
+ GLint level;
+ GLint x;
+ GLint y;
+ GLint z;
+};
-namespace {
+struct CopyRegion {
+ GLsizei width;
+ GLsizei height;
+ GLsizei depth;
+};
struct FormatTuple {
GLenum internal_format;
@@ -40,7 +63,7 @@ struct FormatTuple {
GLenum type = GL_NONE;
};
-constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
+constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
@@ -103,72 +126,113 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- // Compressed sRGB formats
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
-
- // Depth formats
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
-
- // DepthStencil formats
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
+constexpr std::array ACCELERATED_FORMATS{
+ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
+ GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
+ GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I,
+ GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I,
+ GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16,
+ GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM,
+ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
+};
+
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
- ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
- return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
+ ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+ return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
}
-GLenum GetTextureTarget(const SurfaceTarget& target) {
- switch (target) {
- case SurfaceTarget::TextureBuffer:
+GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
+ switch (info.type) {
+ case ImageType::e1D:
+ return GL_TEXTURE_1D_ARRAY;
+ case ImageType::e2D:
+ if (info.num_samples > 1) {
+ return GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
+ }
+ return GL_TEXTURE_2D_ARRAY;
+ case ImageType::e3D:
+ return GL_TEXTURE_3D;
+ case ImageType::Linear:
+ return GL_TEXTURE_2D_ARRAY;
+ case ImageType::Buffer:
return GL_TEXTURE_BUFFER;
- case SurfaceTarget::Texture1D:
+ }
+ UNREACHABLE_MSG("Invalid image type={}", info.type);
+ return GL_NONE;
+}
+
+GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+ const bool is_multisampled = num_samples > 1;
+ switch (type) {
+ case ImageViewType::e1D:
return GL_TEXTURE_1D;
- case SurfaceTarget::Texture2D:
- return GL_TEXTURE_2D;
- case SurfaceTarget::Texture3D:
+ case ImageViewType::e2D:
+ return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
+ case ImageViewType::Cube:
+ return GL_TEXTURE_CUBE_MAP;
+ case ImageViewType::e3D:
return GL_TEXTURE_3D;
- case SurfaceTarget::Texture1DArray:
+ case ImageViewType::e1DArray:
return GL_TEXTURE_1D_ARRAY;
- case SurfaceTarget::Texture2DArray:
- return GL_TEXTURE_2D_ARRAY;
- case SurfaceTarget::TextureCubemap:
- return GL_TEXTURE_CUBE_MAP;
- case SurfaceTarget::TextureCubeArray:
+ case ImageViewType::e2DArray:
+ return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
+ case ImageViewType::CubeArray:
return GL_TEXTURE_CUBE_MAP_ARRAY;
+ case ImageViewType::Rect:
+ return GL_TEXTURE_RECTANGLE;
+ case ImageViewType::Buffer:
+ return GL_TEXTURE_BUFFER;
}
- UNREACHABLE();
- return {};
+ UNREACHABLE_MSG("Invalid image view type={}", type);
+ return GL_NONE;
}
-GLint GetSwizzleSource(SwizzleSource source) {
+GLenum TextureMode(PixelFormat format, bool is_first) {
+ switch (format) {
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+ case PixelFormat::S8_UINT_D24_UNORM:
+ return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+ default:
+ UNREACHABLE();
+ return GL_DEPTH_COMPONENT;
+ }
+}
+
+GLint Swizzle(SwizzleSource source) {
switch (source) {
case SwizzleSource::Zero:
return GL_ZERO;
@@ -184,531 +248,813 @@ GLint GetSwizzleSource(SwizzleSource source) {
case SwizzleSource::OneFloat:
return GL_ONE;
}
- UNREACHABLE();
+ UNREACHABLE_MSG("Invalid swizzle source={}", source);
return GL_NONE;
}
-GLenum GetComponent(PixelFormat format, bool is_first) {
- switch (format) {
- case PixelFormat::D24_UNORM_S8_UINT:
- case PixelFormat::D32_FLOAT_S8_UINT:
- return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
- case PixelFormat::S8_UINT_D24_UNORM:
- return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+GLenum AttachmentType(PixelFormat format) {
+ switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
+ case SurfaceType::Depth:
+ return GL_DEPTH_ATTACHMENT;
+ case SurfaceType::DepthStencil:
+ return GL_DEPTH_STENCIL_ATTACHMENT;
default:
- UNREACHABLE();
- return GL_DEPTH_COMPONENT;
+ UNIMPLEMENTED_MSG("Unimplemented type={}", type);
+ return GL_NONE;
}
}
-void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
- if (params.IsBuffer()) {
- return;
+[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) {
+ if (!device.HasASTC() && IsPixelFormatASTC(format)) {
+ return true;
}
- glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1));
- if (params.num_levels == 1) {
- glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
+ switch (format) {
+ case PixelFormat::BC4_UNORM:
+ case PixelFormat::BC5_UNORM:
+ return type == ImageType::e3D;
+ default:
+ break;
}
+ return false;
}
-OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format,
- OGLBuffer& texture_buffer) {
- OGLTexture texture;
- texture.Create(target);
+[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::G:
+ return SwizzleSource::R;
+ default:
+ return value;
+ }
+}
- switch (params.target) {
- case SurfaceTarget::Texture1D:
- glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width);
- break;
- case SurfaceTarget::TextureBuffer:
- texture_buffer.Create();
- glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
- nullptr, GL_DYNAMIC_STORAGE_BIT);
- glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
+void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
+ switch (format) {
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ case PixelFormat::S8_UINT_D24_UNORM:
+ UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
+ glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+ TextureMode(format, swizzle[0] == SwizzleSource::R));
+ std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
- case SurfaceTarget::Texture2D:
- case SurfaceTarget::TextureCubemap:
- glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
- params.height);
+ default:
break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width,
- params.height, params.depth);
+ }
+ std::array<GLint, 4> gl_swizzle;
+ std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle);
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+}
+
+[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
+ const VideoCommon::ImageInfo& info) {
+ // Disable accelerated uploads for now as they don't implement swizzled uploads
+ return false;
+ switch (info.type) {
+ case ImageType::e2D:
+ case ImageType::e3D:
+ case ImageType::Linear:
break;
default:
- UNREACHABLE();
+ return false;
+ }
+ const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+ const auto& format_info = runtime.FormatInfo(info.type, internal_format);
+ if (format_info.is_compressed) {
+ return false;
+ }
+ if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
+ return false;
}
+ if (format_info.compatibility_by_size) {
+ return true;
+ }
+ const GLenum store_format = StoreFormat(BytesPerBlock(info.format));
+ const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class;
+ return format_info.compatibility_class == store_class;
+}
- ApplyTextureDefaults(params, texture.handle);
+[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
+ VideoCommon::SubresourceLayers subresource, GLenum target) {
+ switch (target) {
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(offset.y),
+ .z = static_cast<GLint>(subresource.base_layer),
+ };
+ case GL_TEXTURE_3D:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(offset.y),
+ .z = static_cast<GLint>(offset.z),
+ };
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
+ return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0};
+ }
+}
- return texture;
+[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent,
+ VideoCommon::SubresourceLayers dst_subresource,
+ GLenum target) {
+ switch (target) {
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(extent.height),
+ .depth = static_cast<GLsizei>(dst_subresource.num_layers),
+ };
+ case GL_TEXTURE_3D:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(extent.height),
+ .depth = static_cast<GLsizei>(extent.depth),
+ };
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
+ return CopyRegion{.width = 0, .height = 0, .depth = 0};
+ }
}
-constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
- SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
+ if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
+ const GLuint texture = image_view->DefaultHandle();
+ glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ return;
+ }
+ const GLuint texture = image_view->Handle(ImageViewType::e3D);
+ if (image_view->range.extent.layers > 1) {
+ // TODO: OpenGL doesn't support rendering to a fixed number of slices
+ glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ } else {
+ const u32 slice = image_view->range.base.layer;
+ glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice);
+ }
}
} // Anonymous namespace
-CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
- bool is_astc_supported)
- : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
- if (is_converted) {
- internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
- format = GL_RGBA;
- type = GL_UNSIGNED_BYTE;
- } else {
- const auto& tuple{GetFormatTuple(params.pixel_format)};
- internal_format = tuple.internal_format;
- format = tuple.format;
- type = tuple.type;
- is_compressed = params.IsCompressed();
- }
- target = GetTextureTarget(params.target);
- texture = CreateTexture(params, target, internal_format, texture_buffer);
- DecorateSurfaceName();
+ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
+ : span(map, size), sync{sync_}, handle{handle_} {}
- u32 num_layers = 1;
- if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
- num_layers = params.depth;
+ImageBufferMap::~ImageBufferMap() {
+ if (sync) {
+ sync->Create();
}
-
- main_view =
- CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
}
-CachedSurface::~CachedSurface() = default;
+TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
+ StateTracker& state_tracker_)
+ : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
+ static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
+ for (size_t i = 0; i < TARGETS.size(); ++i) {
+ const GLenum target = TARGETS[i];
+ for (const FormatTuple& tuple : FORMAT_TABLE) {
+ const GLenum format = tuple.internal_format;
+ GLint compat_class;
+ GLint compat_type;
+ GLint is_compressed;
+ glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class);
+ glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1,
+ &compat_type);
+ glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed);
+ const FormatProperties properties{
+ .compatibility_class = static_cast<GLenum>(compat_class),
+ .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE,
+ .is_compressed = is_compressed == GL_TRUE,
+ };
+ format_properties[i].emplace(format, properties);
+ }
+ }
+ has_broken_texture_view_formats = device.HasBrokenTextureViewFormats();
+
+ null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
+ null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
+ null_image_3d.Create(GL_TEXTURE_3D);
+ null_image_rect.Create(GL_TEXTURE_RECTANGLE);
+ glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
+ glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
+ glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
+ glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
+
+ std::array<GLuint, 4> new_handles;
+ glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
+ null_image_view_1d.handle = new_handles[0];
+ null_image_view_2d.handle = new_handles[1];
+ null_image_view_2d_array.handle = new_handles[2];
+ null_image_view_cube.handle = new_handles[3];
+ glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1,
+ 0, 1);
+ glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0,
+ 1, 0, 1);
+ glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY,
+ null_image_cube_array.handle, GL_R8, 0, 1, 0, 1);
+ glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
+ GL_R8, 0, 1, 0, 6);
+ const std::array texture_handles{
+ null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
+ null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
+ null_image_view_2d_array.handle, null_image_view_cube.handle,
+ };
+ for (const GLuint handle : texture_handles) {
+ static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
+ }
+ const auto set_view = [this](ImageViewType type, GLuint handle) {
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name = fmt::format("NullImage {}", type);
+ glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ }
+ null_image_views[static_cast<size_t>(type)] = handle;
+ };
+ set_view(ImageViewType::e1D, null_image_view_1d.handle);
+ set_view(ImageViewType::e2D, null_image_view_2d.handle);
+ set_view(ImageViewType::Cube, null_image_view_cube.handle);
+ set_view(ImageViewType::e3D, null_image_3d.handle);
+ set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
+ set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
+ set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
+ set_view(ImageViewType::Rect, null_image_rect.handle);
+}
-void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Download);
+TextureCacheRuntime::~TextureCacheRuntime() = default;
- if (params.IsBuffer()) {
- glGetNamedBufferSubData(texture_buffer.handle, 0,
- static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
- staging_buffer.data());
- return;
- }
+void TextureCacheRuntime::Finish() {
+ glFinish();
+}
- SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
+ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
+ return upload_buffers.RequestMap(size, true);
+}
- for (u32 level = 0; level < params.emulated_levels; ++level) {
- glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
- glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
- const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
+ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
+ return download_buffers.RequestMap(size, false);
+}
- u8* const mip_data = staging_buffer.data() + mip_offset;
- const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
- if (is_compressed) {
- glGetCompressedTextureImage(texture.handle, level, size, mip_data);
- } else {
- glGetTextureImage(texture.handle, level, format, type, size, mip_data);
- }
+void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
+ std::span<const ImageCopy> copies) {
+ const GLuint dst_name = dst_image.Handle();
+ const GLuint src_name = src_image.Handle();
+ const GLenum dst_target = ImageTarget(dst_image.info);
+ const GLenum src_target = ImageTarget(src_image.info);
+ for (const ImageCopy& copy : copies) {
+ const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target);
+ const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target);
+ const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target);
+ glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y,
+ src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x,
+ dst_origin.y, dst_origin.z, region.width, region.height, region.depth);
}
}
-void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Upload);
- SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
- for (u32 level = 0; level < params.emulated_levels; ++level) {
- UploadTextureMipmap(level, staging_buffer);
+bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
+ if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
+ return false;
}
+ return true;
}
-void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
- glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
-
- const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
- const u8* buffer{staging_buffer.data() + mip_offset};
- if (is_compressed) {
- const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
- switch (params.target) {
- case SurfaceTarget::Texture2D:
- glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- internal_format, image_size, buffer);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- static_cast<GLsizei>(params.GetMipDepth(level)),
- internal_format, image_size, buffer);
- break;
- case SurfaceTarget::TextureCubemap: {
- const std::size_t layer_size{params.GetHostLayerSize(level)};
- for (std::size_t face = 0; face < params.depth; ++face) {
- glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)), 1,
- internal_format, static_cast<GLsizei>(layer_size),
- buffer);
- buffer += layer_size;
- }
- break;
- }
- default:
- UNREACHABLE();
- }
+void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
+ std::span<const ImageCopy> copies) {
+ if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
+ ASSERT(src.info.type == ImageType::e3D);
+ util_shaders.CopyBC4(dst, src, copies);
} else {
- switch (params.target) {
- case SurfaceTarget::Texture1D:
- glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
- buffer);
- break;
- case SurfaceTarget::TextureBuffer:
- ASSERT(level == 0);
- glNamedBufferSubData(texture_buffer.handle, 0,
- params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
- break;
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2D:
- glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
- params.GetMipHeight(level), format, type, buffer);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureSubImage3D(
- texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
- break;
- case SurfaceTarget::TextureCubemap:
- for (std::size_t face = 0; face < params.depth; ++face) {
- glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
- params.GetMipWidth(level), params.GetMipHeight(level), 1,
- format, type, buffer);
- buffer += params.GetHostLayerSize(level);
- }
- break;
- default:
- UNREACHABLE();
- }
+ UNREACHABLE();
}
}
-void CachedSurface::DecorateSurfaceName() {
- LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
-}
+void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation) {
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
- LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
+ ASSERT(dst->BufferBits() == src->BufferBits());
+
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisablei(GL_SCISSOR_TEST, 0);
+
+ const GLbitfield buffer_bits = dst->BufferBits();
+ const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
+ const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
+ glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y,
+ src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y,
+ dst_region[1].x, dst_region[1].y, buffer_bits,
+ is_linear ? GL_LINEAR : GL_NEAREST);
}
-View CachedSurface::CreateView(const ViewParams& view_key) {
- return CreateViewInner(view_key, false);
+void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+ size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ switch (image.info.type) {
+ case ImageType::e2D:
+ return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
+ case ImageType::e3D:
+ return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
+ case ImageType::Linear:
+ return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
+ default:
+ UNREACHABLE();
+ break;
+ }
}
-View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) {
- auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy);
- views[view_key] = view;
- if (!is_proxy)
- view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
- return view;
+void TextureCacheRuntime::InsertUploadMemoryBarrier() {
+ glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
-CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
- bool is_proxy)
- : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
- target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
- if (!is_proxy) {
- main_view = CreateTextureView();
+FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const {
+ switch (type) {
+ case ImageType::e1D:
+ return format_properties[0].at(internal_format);
+ case ImageType::e2D:
+ case ImageType::Linear:
+ return format_properties[1].at(internal_format);
+ case ImageType::e3D:
+ return format_properties[2].at(internal_format);
+ default:
+ UNREACHABLE();
+ return FormatProperties{};
}
}
-CachedSurfaceView::~CachedSurfaceView() = default;
+TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
+ : storage_flags{storage_flags_}, map_flags{map_flags_} {}
-void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
- ASSERT(params.num_levels == 1);
+TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
- if (params.target == SurfaceTarget::Texture3D) {
- if (params.num_layers > 1) {
- ASSERT(params.base_layer == 0);
- glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
- } else {
- glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
- params.base_level, params.base_layer);
- }
- return;
+ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
+ bool insert_fence) {
+ const size_t index = RequestBuffer(requested_size);
+ OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
+ return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
+}
+
+size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
+ if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
+ return *index;
}
- if (params.num_layers > 1) {
- UNIMPLEMENTED_IF(params.base_layer != 0);
- glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
- return;
+ OGLBuffer& buffer = buffers.emplace_back();
+ buffer.Create();
+ glNamedBufferStorage(buffer.handle, requested_size, nullptr,
+ storage_flags | GL_MAP_PERSISTENT_BIT);
+ maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
+ map_flags | GL_MAP_PERSISTENT_BIT)));
+
+ syncs.emplace_back();
+ sizes.push_back(requested_size);
+
+ ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
+ maps.size() == sizes.size());
+
+ return buffers.size() - 1;
+}
+
+std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
+ size_t smallest_buffer = std::numeric_limits<size_t>::max();
+ std::optional<size_t> found;
+ const size_t num_buffers = sizes.size();
+ for (size_t index = 0; index < num_buffers; ++index) {
+ const size_t buffer_size = sizes[index];
+ if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
+ continue;
+ }
+ if (syncs[index].handle != 0) {
+ GLint status;
+ glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
+ if (status != GL_SIGNALED) {
+ continue;
+ }
+ syncs[index].Release();
+ }
+ smallest_buffer = buffer_size;
+ found = index;
}
+ return found;
+}
- const GLenum view_target = surface.GetTarget();
- const GLuint texture = surface.GetTexture();
- switch (surface.GetSurfaceParams().target) {
- case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
+Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
+ VAddr cpu_addr_)
+ : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
+ if (CanBeAccelerated(runtime, info)) {
+ flags |= ImageFlagBits::AcceleratedUpload;
+ }
+ if (IsConverted(runtime.device, info.format, info.type)) {
+ flags |= ImageFlagBits::Converted;
+ gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+ gl_format = GL_RGBA;
+ gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+ } else {
+ const auto& tuple = GetFormatTuple(info.format);
+ gl_internal_format = tuple.internal_format;
+ gl_format = tuple.format;
+ gl_type = tuple.type;
+ }
+ const GLenum target = ImageTarget(info);
+ const GLsizei width = info.size.width;
+ const GLsizei height = info.size.height;
+ const GLsizei depth = info.size.depth;
+ const int max_host_mip_levels = std::bit_width(info.size.width);
+ const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
+ const GLsizei num_layers = info.resources.layers;
+ const GLsizei num_samples = info.num_samples;
+
+ GLuint handle = 0;
+ if (target != GL_TEXTURE_BUFFER) {
+ texture.Create(target);
+ handle = texture.handle;
+ }
+ switch (target) {
+ case GL_TEXTURE_1D_ARRAY:
+ glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
break;
- case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
+ case GL_TEXTURE_2D_ARRAY:
+ glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
break;
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubemap:
- case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
- params.base_layer);
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
+ // TODO: Where should 'fixedsamplelocations' come from?
+ const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
+ glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
+ height >> samples_y, num_layers, GL_FALSE);
+ break;
+ }
+ case GL_TEXTURE_RECTANGLE:
+ glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
+ break;
+ case GL_TEXTURE_3D:
+ glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
+ break;
+ case GL_TEXTURE_BUFFER:
+ buffer.Create();
+ glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
break;
default:
- UNIMPLEMENTED();
+ UNREACHABLE_MSG("Invalid target=0x{:x}", target);
+ break;
+ }
+ if (runtime.device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(*this);
+ glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
+ static_cast<GLsizei>(name.size()), name.data());
}
}
-GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
- if (GetSurfaceParams().IsBuffer()) {
- return GetTexture();
- }
- const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (current_swizzle == new_swizzle) {
- return current_view;
- }
- current_swizzle = new_swizzle;
+void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
+ glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
- const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
- OGLTextureView& view = entry->second;
- if (!is_cache_miss) {
- current_view = view.handle;
- return view.handle;
- }
- view = CreateTextureView();
- current_view = view.handle;
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
- std::array swizzle{x_source, y_source, z_source, w_source};
+ u32 current_row_length = std::numeric_limits<u32>::max();
+ u32 current_image_height = std::numeric_limits<u32>::max();
- switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
- case PixelFormat::D24_UNORM_S8_UINT:
- case PixelFormat::D32_FLOAT_S8_UINT:
- case PixelFormat::S8_UINT_D24_UNORM:
- UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
- glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
- GetComponent(format, x_source == SwizzleSource::R));
-
- // Make sure we sample the first component
- std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
- return value == SwizzleSource::G ? SwizzleSource::R : value;
- });
- [[fallthrough]];
- default: {
- const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
- GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
- glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
- break;
- }
+ for (const VideoCommon::BufferImageCopy& copy : copies) {
+ if (current_row_length != copy.buffer_row_length) {
+ current_row_length = copy.buffer_row_length;
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
+ }
+ if (current_image_height != copy.buffer_image_height) {
+ current_image_height = copy.buffer_image_height;
+ glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
+ }
+ CopyBufferToImage(copy, buffer_offset);
}
- return view.handle;
}
-OGLTextureView CachedSurfaceView::CreateTextureView() const {
- OGLTextureView texture_view;
- texture_view.Create();
-
- if (target == GL_TEXTURE_3D) {
- glTextureView(texture_view.handle, target, surface.texture.handle, format,
- params.base_level, params.num_levels, 0, 1);
- } else {
- glTextureView(texture_view.handle, target, surface.texture.handle, format,
- params.base_level, params.num_levels, params.base_layer, params.num_layers);
+void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ for (const VideoCommon::BufferCopy& copy : copies) {
+ glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
+ copy.dst_offset, copy.size);
}
- ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
-
- return texture_view;
}
-TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
- Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::MemoryManager& gpu_memory, const Device& device,
- StateTracker& state_tracker_)
- : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{
- state_tracker_} {
- src_framebuffer.Create();
- dst_framebuffer.Create();
-}
+void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
-TextureCacheOpenGL::~TextureCacheOpenGL() = default;
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
-Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
- return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
-}
+ u32 current_row_length = std::numeric_limits<u32>::max();
+ u32 current_image_height = std::numeric_limits<u32>::max();
-void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) {
- const auto& src_params = src_surface->GetSurfaceParams();
- const auto& dst_params = dst_surface->GetSurfaceParams();
- if (src_params.type != dst_params.type) {
- // A fallback is needed
- return;
+ for (const VideoCommon::BufferImageCopy& copy : copies) {
+ if (current_row_length != copy.buffer_row_length) {
+ current_row_length = copy.buffer_row_length;
+ glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
+ }
+ if (current_image_height != copy.buffer_image_height) {
+ current_image_height = copy.buffer_image_height;
+ glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
+ }
+ CopyImageToBuffer(copy, buffer_offset);
}
- const auto src_handle = src_surface->GetTexture();
- const auto src_target = src_surface->GetTarget();
- const auto dst_handle = dst_surface->GetTexture();
- const auto dst_target = dst_surface->GetTarget();
- glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
- copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
- copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
- copy_params.dest_z, copy_params.width, copy_params.height,
- copy_params.depth);
}
-void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) {
- const auto& src_params{src_view->GetSurfaceParams()};
- const auto& dst_params{dst_view->GetSurfaceParams()};
- UNIMPLEMENTED_IF(src_params.depth != 1);
- UNIMPLEMENTED_IF(dst_params.depth != 1);
-
- state_tracker.NotifyScissor0();
- state_tracker.NotifyFramebuffer();
- state_tracker.NotifyRasterizeEnable();
- state_tracker.NotifyFramebufferSRGB();
+void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
+ // Compressed formats don't have a pixel format or type
+ const bool is_compressed = gl_format == GL_NONE;
+ const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset);
- if (dst_params.srgb_conversion) {
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
+ switch (info.type) {
+ case ImageType::e1D:
+ if (is_compressed) {
+ glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_subresource.base_layer,
+ copy.image_extent.width,
+ copy.image_subresource.num_layers, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_subresource.base_layer,
+ copy.image_extent.width, copy.image_subresource.num_layers,
+ gl_format, gl_type, offset);
+ }
+ break;
+ case ImageType::e2D:
+ case ImageType::Linear:
+ if (is_compressed) {
+ glCompressedTextureSubImage3D(
+ texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
+ copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width,
+ copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_offset.y,
+ copy.image_subresource.base_layer, copy.image_extent.width,
+ copy.image_extent.height, copy.image_subresource.num_layers,
+ gl_format, gl_type, offset);
+ }
+ break;
+ case ImageType::e3D:
+ if (is_compressed) {
+ glCompressedTextureSubImage3D(
+ texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
+ copy.image_offset.y, copy.image_offset.z, copy.image_extent.width,
+ copy.image_extent.height, copy.image_extent.depth, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_offset.y, copy.image_offset.z,
+ copy.image_extent.width, copy.image_extent.height,
+ copy.image_extent.depth, gl_format, gl_type, offset);
+ }
+ break;
+ default:
+ UNREACHABLE();
}
- glDisable(GL_RASTERIZER_DISCARD);
- glDisablei(GL_SCISSOR_TEST, 0);
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
-
- GLenum buffers = 0;
- if (src_params.type == SurfaceType::ColorTexture) {
- src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
-
- dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
-
- buffers = GL_COLOR_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+}
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
+ const GLint x_offset = copy.image_offset.x;
+ const GLsizei width = copy.image_extent.width;
- buffers = GL_DEPTH_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::DepthStencil) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
+ const GLint level = copy.image_subresource.base_level;
+ const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size);
+ void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ GLint y_offset = 0;
+ GLint z_offset = 0;
+ GLsizei height = 1;
+ GLsizei depth = 1;
- buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ switch (info.type) {
+ case ImageType::e1D:
+ y_offset = copy.image_subresource.base_layer;
+ height = copy.image_subresource.num_layers;
+ break;
+ case ImageType::e2D:
+ case ImageType::Linear:
+ y_offset = copy.image_offset.y;
+ z_offset = copy.image_subresource.base_layer;
+ height = copy.image_extent.height;
+ depth = copy.image_subresource.num_layers;
+ break;
+ case ImageType::e3D:
+ y_offset = copy.image_offset.y;
+ z_offset = copy.image_offset.z;
+ height = copy.image_extent.height;
+ depth = copy.image_extent.depth;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ // Compressed formats don't have a pixel format or type
+ const bool is_compressed = gl_format == GL_NONE;
+ if (is_compressed) {
+ glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width,
+ height, depth, buffer_size, offset);
+ } else {
+ glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height,
+ depth, gl_format, gl_type, buffer_size, offset);
}
-
- const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
- const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
- const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
-
- glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
- static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
- static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
- static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
- buffers,
- is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
}
-void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
- const auto& src_params = src_surface->GetSurfaceParams();
- const auto& dst_params = dst_surface->GetSurfaceParams();
- UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
+ ImageId image_id_, Image& image)
+ : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
+ const Device& device = runtime.device;
+ if (True(image.flags & ImageFlagBits::Converted)) {
+ internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+ } else {
+ internal_format = GetFormatTuple(format).internal_format;
+ }
+ VideoCommon::SubresourceRange flatten_range = info.range;
+ std::array<GLuint, 2> handles;
+ stored_views.reserve(2);
- const auto source_format = GetFormatTuple(src_params.pixel_format);
- const auto dest_format = GetFormatTuple(dst_params.pixel_format);
+ switch (info.type) {
+ case ImageViewType::e1DArray:
+ flatten_range.extent.layers = 1;
+ [[fallthrough]];
+ case ImageViewType::e1D:
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::e2DArray:
+ flatten_range.extent.layers = 1;
+ [[fallthrough]];
+ case ImageViewType::e2D:
+ if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
+ // 2D and 2D array views on a 3D textures are used exclusively for render targets
+ ASSERT(info.range.extent.levels == 1);
+ const VideoCommon::SubresourceRange slice_range{
+ .base = {.level = info.range.base.level, .layer = 0},
+ .extent = {.levels = 1, .layers = 1},
+ };
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
+ break;
+ }
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::e3D:
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+ break;
+ case ImageViewType::CubeArray:
+ flatten_range.extent.layers = 6;
+ [[fallthrough]];
+ case ImageViewType::Cube:
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::Rect:
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+ break;
+ case ImageViewType::Buffer:
+ glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
+ SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+ break;
+ }
+ default_handle = Handle(info.type);
+}
- const std::size_t source_size = src_surface->GetHostSizeInBytes();
- const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
+ : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
- const std::size_t buffer_size = std::max(source_size, dest_size);
+void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
+ GLuint handle, const VideoCommon::ImageViewInfo& info,
+ VideoCommon::SubresourceRange view_range) {
+ if (info.type == ImageViewType::Buffer) {
+ // TODO: Take offset from buffer cache
+ glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
+ image.guest_size_bytes);
+ } else {
+ const GLuint parent = image.texture.handle;
+ const GLenum target = ImageTarget(view_type, image.info.num_samples);
+ glTextureView(handle, target, parent, internal_format, view_range.base.level,
+ view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+ if (!info.IsRenderTarget()) {
+ ApplySwizzle(handle, format, info.Swizzle());
+ }
+ }
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(*this, view_type);
+ glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ }
+ stored_views.emplace_back().handle = handle;
+ views[static_cast<size_t>(view_type)] = handle;
+}
- GLuint copy_pbo_handle = FetchPBO(buffer_size);
+Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
+ const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE;
+ const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func);
+ const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None);
+ const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter);
+ const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter);
+ const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE;
+
+ UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
+ UNIMPLEMENTED_IF(config.float_coord_normalization != 0);
+
+ sampler.Create();
+ const GLuint handle = sampler.handle;
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
+ glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
+ glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
+ glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
+ glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
+ glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
+
+ if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
+ } else {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
+ }
+ if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
+ glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
+ } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
+ }
+ if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
+ } else if (seamless == GL_FALSE) {
+ // We default to false because it's more common
+ LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ }
+}
- glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
+Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
+ // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
+ // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
+ // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
+ // mismatching size, this is why core framebuffers are preferred.
+ GLuint handle;
+ glGenFramebuffers(1, &handle);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
+
+ GLsizei num_buffers = 0;
+ std::array<GLenum, NUM_RT> gl_draw_buffers;
+ gl_draw_buffers.fill(GL_NONE);
+
+ for (size_t index = 0; index < color_buffers.size(); ++index) {
+ const ImageView* const image_view = color_buffers[index];
+ if (!image_view) {
+ continue;
+ }
+ buffer_bits |= GL_COLOR_BUFFER_BIT;
+ gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index];
+ num_buffers = static_cast<GLsizei>(index + 1);
- if (src_surface->IsCompressed()) {
- glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
- nullptr);
- } else {
- glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
- static_cast<GLsizei>(source_size), nullptr);
+ const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index);
+ AttachTexture(handle, attachment, image_view);
}
- glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
+ if (const ImageView* const image_view = depth_buffer; image_view) {
+ if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
+ buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ } else {
+ buffer_bits |= GL_DEPTH_BUFFER_BIT;
+ }
+ const GLenum attachment = AttachmentType(image_view->format);
+ AttachTexture(handle, attachment, image_view);
+ }
- const GLsizei width = static_cast<GLsizei>(dst_params.width);
- const GLsizei height = static_cast<GLsizei>(dst_params.height);
- const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
- if (dst_surface->IsCompressed()) {
- LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
- UNREACHABLE();
+ if (num_buffers > 1) {
+ glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data());
+ } else if (num_buffers > 0) {
+ glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]);
} else {
- switch (dst_params.target) {
- case SurfaceTarget::Texture1D:
- glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
- dest_format.type, nullptr);
- break;
- case SurfaceTarget::Texture2D:
- glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
- dest_format.format, dest_format.type, nullptr);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
- dest_format.format, dest_format.type, nullptr);
- break;
- case SurfaceTarget::TextureCubemap:
- glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
- dest_format.format, dest_format.type, nullptr);
- break;
- default:
- LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
- static_cast<u32>(dst_params.target));
- UNREACHABLE();
- }
+ glNamedFramebufferDrawBuffer(handle, GL_NONE);
}
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- glTextureBarrier();
-}
+ glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width);
+ glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height);
+ // TODO
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...);
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...);
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...);
-GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
- ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; });
- const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
- OGLBuffer& cp = copy_pbo_cache[l2];
- if (cp.handle == 0) {
- const std::size_t ceil_size = 1ULL << l2;
- cp.Create();
- cp.MakeStreamCopy(ceil_size);
+ if (runtime.device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(key);
+ glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
}
- return cp.handle;
+ framebuffer.handle = handle;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 7787134fc..15b7c3676 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -4,156 +4,251 @@
#pragma once
-#include <array>
-#include <functional>
#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
+#include <span>
#include <glad/glad.h>
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
-using VideoCommon::SurfaceParams;
-using VideoCommon::ViewParams;
-
-class CachedSurfaceView;
-class CachedSurface;
-class TextureCacheOpenGL;
+class Device;
+class ProgramManager;
class StateTracker;
-using Surface = std::shared_ptr<CachedSurface>;
-using View = std::shared_ptr<CachedSurfaceView>;
-using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
+class Framebuffer;
+class Image;
+class ImageView;
+class Sampler;
-class CachedSurface final : public VideoCommon::SurfaceBase<View> {
- friend CachedSurfaceView;
+using VideoCommon::ImageId;
+using VideoCommon::ImageViewId;
+using VideoCommon::ImageViewType;
+using VideoCommon::NUM_RT;
+using VideoCommon::Offset2D;
+using VideoCommon::RenderTargets;
+class ImageBufferMap {
public:
- explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported);
- ~CachedSurface();
-
- void UploadTexture(const std::vector<u8>& staging_buffer) override;
- void DownloadTexture(std::vector<u8>& staging_buffer) override;
+ explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
+ ~ImageBufferMap();
- GLenum GetTarget() const {
- return target;
+ GLuint Handle() const noexcept {
+ return handle;
}
- GLuint GetTexture() const {
- return texture.handle;
+ std::span<u8> Span() const noexcept {
+ return span;
}
- bool IsCompressed() const {
- return is_compressed;
+private:
+ std::span<u8> span;
+ OGLSync* sync;
+ GLuint handle;
+};
+
+struct FormatProperties {
+ GLenum compatibility_class;
+ bool compatibility_by_size;
+ bool is_compressed;
+};
+
+class TextureCacheRuntime {
+ friend Framebuffer;
+ friend Image;
+ friend ImageView;
+ friend Sampler;
+
+public:
+ explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
+ StateTracker& state_tracker);
+ ~TextureCacheRuntime();
+
+ void Finish();
+
+ ImageBufferMap MapUploadBuffer(size_t size);
+
+ ImageBufferMap MapDownloadBuffer(size_t size);
+
+ void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+ UNIMPLEMENTED();
}
-protected:
- void DecorateSurfaceName() override;
+ bool CanImageBeCopied(const Image& dst, const Image& src);
+
+ void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
+ void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation);
+
+ void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
- View CreateView(const ViewParams& view_key) override;
- View CreateViewInner(const ViewParams& view_key, bool is_proxy);
+ void InsertUploadMemoryBarrier();
+
+ FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
+
+ bool HasBrokenTextureViewFormats() const noexcept {
+ return has_broken_texture_view_formats;
+ }
private:
- void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
+ struct StagingBuffers {
+ explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
+ ~StagingBuffers();
- GLenum internal_format{};
- GLenum format{};
- GLenum type{};
- bool is_compressed{};
- GLenum target{};
- u32 view_count{};
+ ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
- OGLTexture texture;
- OGLBuffer texture_buffer;
+ size_t RequestBuffer(size_t requested_size);
+
+ std::optional<size_t> FindBuffer(size_t requested_size);
+
+ std::vector<OGLSync> syncs;
+ std::vector<OGLBuffer> buffers;
+ std::vector<u8*> maps;
+ std::vector<size_t> sizes;
+ GLenum storage_flags;
+ GLenum map_flags;
+ };
+
+ const Device& device;
+ StateTracker& state_tracker;
+ UtilShaders util_shaders;
+
+ std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
+ bool has_broken_texture_view_formats = false;
+
+ StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
+
+ OGLTexture null_image_1d_array;
+ OGLTexture null_image_cube_array;
+ OGLTexture null_image_3d;
+ OGLTexture null_image_rect;
+ OGLTextureView null_image_view_1d;
+ OGLTextureView null_image_view_2d;
+ OGLTextureView null_image_view_2d_array;
+ OGLTextureView null_image_view_cube;
+
+ std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
};
-class CachedSurfaceView final : public VideoCommon::ViewBase {
+class Image : public VideoCommon::ImageBase {
+ friend ImageView;
+
public:
- explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
- ~CachedSurfaceView();
+ explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
+ VAddr cpu_addr);
- /// @brief Attaches this texture view to the currently bound fb_target framebuffer
- /// @param attachment Attachment to bind textures to
- /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
- void Attach(GLenum attachment, GLenum fb_target) const;
+ void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
- GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferCopy> copies);
- void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
+ void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
- void MarkAsModified(u64 tick) {
- surface.MarkAsModified(true, tick);
+ GLuint Handle() const noexcept {
+ return texture.handle;
}
- GLuint GetTexture() const {
- if (is_proxy) {
- return surface.GetTexture();
- }
- return main_view.handle;
+private:
+ void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
+
+ void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
+
+ OGLTexture texture;
+ OGLTextureView store_view;
+ OGLBuffer buffer;
+ GLenum gl_internal_format = GL_NONE;
+ GLenum gl_format = GL_NONE;
+ GLenum gl_type = GL_NONE;
+};
+
+class ImageView : public VideoCommon::ImageViewBase {
+ friend Image;
+
+public:
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
+
+ [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
+ return views[static_cast<size_t>(query_type)];
}
- GLenum GetFormat() const {
- return format;
+ [[nodiscard]] GLuint DefaultHandle() const noexcept {
+ return default_handle;
}
- const SurfaceParams& GetSurfaceParams() const {
- return surface.GetSurfaceParams();
+ [[nodiscard]] GLenum Format() const noexcept {
+ return internal_format;
}
private:
- OGLTextureView CreateTextureView() const;
+ void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
+ const VideoCommon::ImageViewInfo& info,
+ VideoCommon::SubresourceRange view_range);
+
+ std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+ std::vector<OGLTextureView> stored_views;
+ GLuint default_handle = 0;
+ GLenum internal_format = GL_NONE;
+};
+
+class ImageAlloc : public VideoCommon::ImageAllocBase {};
- CachedSurface& surface;
- const GLenum format;
- const GLenum target;
- const bool is_proxy;
+class Sampler {
+public:
+ explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
- std::unordered_map<u32, OGLTextureView> view_cache;
- OGLTextureView main_view;
+ GLuint Handle() const noexcept {
+ return sampler.handle;
+ }
- // Use an invalid default so it always fails the comparison test
- u32 current_swizzle = 0xffffffff;
- GLuint current_view = 0;
+private:
+ OGLSampler sampler;
};
-class TextureCacheOpenGL final : public TextureCacheBase {
+class Framebuffer {
public:
- explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
- Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::MemoryManager& gpu_memory, const Device& device,
- StateTracker& state_tracker);
- ~TextureCacheOpenGL();
-
-protected:
- Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
-
- void ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) override;
+ explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
- void ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return framebuffer.handle;
+ }
- void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
+ [[nodiscard]] GLbitfield BufferBits() const noexcept {
+ return buffer_bits;
+ }
private:
- GLuint FetchPBO(std::size_t buffer_size);
-
- StateTracker& state_tracker;
+ OGLFramebuffer framebuffer;
+ GLbitfield buffer_bits = GL_NONE;
+};
- OGLFramebuffer src_framebuffer;
- OGLFramebuffer dst_framebuffer;
- std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
+struct TextureCacheParams {
+ static constexpr bool ENABLE_VALIDATION = true;
+ static constexpr bool FRAMEBUFFER_BLITS = true;
+ static constexpr bool HAS_EMULATED_COPIES = true;
+
+ using Runtime = OpenGL::TextureCacheRuntime;
+ using Image = OpenGL::Image;
+ using ImageAlloc = OpenGL::ImageAlloc;
+ using ImageView = OpenGL::ImageView;
+ using Sampler = OpenGL::Sampler;
+ using Framebuffer = OpenGL::Framebuffer;
};
+using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a8be2aa37..cbccfdeb4 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -107,7 +107,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
- UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
+ UNREACHABLE_MSG("Invalid index_format={}", index_format);
return {};
}
@@ -144,7 +144,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
case Maxwell::PrimitiveTopology::Patches:
return GL_PATCHES;
}
- UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
+ UNREACHABLE_MSG("Invalid topology={}", topology);
return GL_POINTS;
}
@@ -172,8 +172,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
}
break;
}
- UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
- static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
+ UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode,
+ mipmap_filter_mode);
return GL_NEAREST;
}
@@ -204,7 +204,7 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
return GL_MIRROR_CLAMP_TO_EDGE;
}
}
- UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+ UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", wrap_mode);
return GL_REPEAT;
}
@@ -227,7 +227,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
case Tegra::Texture::DepthCompareFunc::Always:
return GL_ALWAYS;
}
- UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
+ UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", func);
return GL_GREATER;
}
@@ -249,7 +249,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::MaxGL:
return GL_MAX;
}
- UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+ UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
return GL_FUNC_ADD;
}
@@ -313,7 +313,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
+ UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
return GL_ZERO;
}
@@ -333,7 +333,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
case Tegra::Texture::SwizzleSource::OneFloat:
return GL_ONE;
}
- UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
+ UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
return GL_ZERO;
}
@@ -364,7 +364,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
- UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
return GL_ALWAYS;
}
@@ -395,7 +395,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
case Maxwell::StencilOp::DecrWrapOGL:
return GL_DECR_WRAP;
}
- UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
+ UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil);
return GL_KEEP;
}
@@ -406,7 +406,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
- UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
+ UNIMPLEMENTED_MSG("Unimplemented front face cull={}", front_face);
return GL_CCW;
}
@@ -419,7 +419,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
- UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+ UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
return GL_BACK;
}
@@ -458,7 +458,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
case Maxwell::LogicOperation::Set:
return GL_SET;
}
- UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented logic operation={}", operation);
return GL_COPY;
}
@@ -471,10 +471,23 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
case Maxwell::PolygonMode::Fill:
return GL_FILL;
}
- UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
+ UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode);
return GL_FILL;
}
+inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
+ switch (filter) {
+ case Tegra::Texture::SamplerReduction::WeightedAverage:
+ return GL_WEIGHTED_AVERAGE_ARB;
+ case Tegra::Texture::SamplerReduction::Min:
+ return GL_MIN;
+ case Tegra::Texture::SamplerReduction::Max:
+ return GL_MAX;
+ }
+ UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
+ return GL_WEIGHTED_AVERAGE_ARB;
+}
+
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
// Enumeration order matches register order. We can convert it arithmetically.
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2ccca1993..dd77a543c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -23,10 +23,10 @@
#include "core/telemetry_session.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
-#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/textures/decoders.h"
namespace OpenGL {
@@ -130,8 +130,8 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
- std::unique_ptr<Core::Frontend::GraphicsContext> context)
- : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_},
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+ : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -140,19 +140,18 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
-
PrepareRendertarget(framebuffer);
RenderScreenshot();
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+ state_tracker.BindFramebuffer(0);
DrawScreen(emu_window.GetFramebufferLayout());
++m_current_frame;
rasterizer->TickFrame();
- render_window.PollEvents();
context->SwapBuffers();
+ render_window.OnFrameDisplayed();
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
@@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
- const auto pixel_format{
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
- const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
- const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
- rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
-
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
- VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
- framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
- gl_framebuffer_data.data(), host_ptr);
-
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
+ const u64 size_in_bytes{Tegra::Texture::CalculateSize(
+ true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
+ const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
+ const std::span<const u8> input_data(host_ptr, size_in_bytes);
+ Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
+ framebuffer.width, framebuffer.height, 1, block_height_log2,
+ 0);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
@@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+ // Generate presentation sampler
+ present_sampler.Create();
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
+ // Enable seamless cubemaps when per texture parameters are not available
+ if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+ }
+
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
@@ -275,9 +284,9 @@ void RendererOpenGL::AddTelemetryFields() {
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
- telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
- telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
- telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version);
+ telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
+ telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
+ telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
void RendererOpenGL::CreateRasterizer() {
@@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
- const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
GLint internal_format;
@@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
- static_cast<u32>(framebuffer.pixel_format));
+ // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+ // static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
@@ -348,7 +357,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
} else {
// Other transformations are unsupported
LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
- static_cast<u32>(framebuffer_transform_flags));
+ framebuffer_transform_flags);
UNIMPLEMENTED();
}
}
@@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
- state_tracker.NotifyColorMask0();
+ state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
@@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
}
glBindTextureUnit(0, screen_info.display_texture);
- glBindSampler(0, 0);
+ glBindSampler(0, present_sampler.handle);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
DrawScreen(layout);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
renderer_settings.screenshot_bits);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 9ef181f95..44e109794 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -57,10 +57,10 @@ struct ScreenInfo {
class RendererOpenGL final : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(Core::TelemetrySession& telemetry_session,
- Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
- Tegra::GPU& gpu,
- std::unique_ptr<Core::Frontend::GraphicsContext> context);
+ explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window_,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
bool Init() override;
@@ -102,6 +102,7 @@ private:
StateTracker state_tracker{gpu};
// OpenGL object IDs
+ OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
new file mode 100644
index 000000000..eb849cbf2
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -0,0 +1,224 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <bit>
+#include <span>
+#include <string_view>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
+#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
+#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
+#include "video_core/host_shaders/pitch_unswizzle_comp.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/renderer_opengl/util_shaders.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/accelerated_swizzle.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/decoders.h"
+
+namespace OpenGL {
+
+using namespace HostShaders;
+
+using VideoCommon::Extent3D;
+using VideoCommon::ImageCopy;
+using VideoCommon::ImageType;
+using VideoCommon::SwizzleParameters;
+using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
+using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
+using VideoCore::Surface::BytesPerBlock;
+
+namespace {
+
+OGLProgram MakeProgram(std::string_view source) {
+ OGLShader shader;
+ shader.Create(source, GL_COMPUTE_SHADER);
+
+ OGLProgram program;
+ program.Create(true, false, shader.handle);
+ return program;
+}
+
+} // Anonymous namespace
+
+UtilShaders::UtilShaders(ProgramManager& program_manager_)
+ : program_manager{program_manager_},
+ block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
+ block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
+ pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
+ copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
+ const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
+ swizzle_table_buffer.Create();
+ glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
+}
+
+UtilShaders::~UtilShaders() = default;
+
+void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
+ static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
+ static constexpr GLuint BINDING_INPUT_BUFFER = 1;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+
+ program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+ glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
+
+ const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+
+ const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
+ glUniform3uiv(0, 1, params.origin.data());
+ glUniform3iv(1, 1, params.destination.data());
+ glUniform1ui(2, params.bytes_per_block_log2);
+ glUniform1ui(3, params.layer_stride);
+ glUniform1ui(4, params.block_size);
+ glUniform1ui(5, params.x_shift);
+ glUniform1ui(6, params.block_height);
+ glUniform1ui(7, params.block_height_mask);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
+ input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ GL_WRITE_ONLY, store_format);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
+
+ static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
+ static constexpr GLuint BINDING_INPUT_BUFFER = 1;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+
+ glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
+
+ const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+ const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
+
+ const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
+ glUniform3uiv(0, 1, params.origin.data());
+ glUniform3iv(1, 1, params.destination.data());
+ glUniform1ui(2, params.bytes_per_block_log2);
+ glUniform1ui(3, params.slice_size);
+ glUniform1ui(4, params.block_size);
+ glUniform1ui(5, params.x_shift);
+ glUniform1ui(6, params.block_height);
+ glUniform1ui(7, params.block_height_mask);
+ glUniform1ui(8, params.block_depth);
+ glUniform1ui(9, params.block_depth_mask);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
+ input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ GL_WRITE_ONLY, store_format);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
+ static constexpr GLuint BINDING_INPUT_BUFFER = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+ static constexpr GLuint LOC_ORIGIN = 0;
+ static constexpr GLuint LOC_DESTINATION = 1;
+ static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
+ static constexpr GLuint LOC_PITCH = 3;
+
+ const u32 bytes_per_block = BytesPerBlock(image.info.format);
+ const GLenum format = StoreFormat(bytes_per_block);
+ const u32 pitch = image.info.pitch;
+
+ UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
+ "Non-power of two images are not implemented");
+
+ program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+ glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glUniform2ui(LOC_ORIGIN, 0, 0);
+ glUniform2i(LOC_DESTINATION, 0, 0);
+ glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
+ glUniform1ui(LOC_PITCH, pitch);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
+ input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
+ static constexpr GLuint BINDING_INPUT_IMAGE = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
+ static constexpr GLuint LOC_SRC_OFFSET = 0;
+ static constexpr GLuint LOC_DST_OFFSET = 1;
+
+ program_manager.BindHostCompute(copy_bc4_program.handle);
+
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_subresource.base_layer == 0);
+ ASSERT(copy.src_subresource.num_layers == 1);
+ ASSERT(copy.dst_subresource.base_layer == 0);
+ ASSERT(copy.dst_subresource.num_layers == 1);
+
+ glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
+ glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
+ GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
+ copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+ glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+GLenum StoreFormat(u32 bytes_per_block) {
+ switch (bytes_per_block) {
+ case 1:
+ return GL_R8UI;
+ case 2:
+ return GL_R16UI;
+ case 4:
+ return GL_R32UI;
+ case 8:
+ return GL_RG32UI;
+ case 16:
+ return GL_RGBA32UI;
+ }
+ UNREACHABLE();
+ return GL_R8UI;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
new file mode 100644
index 000000000..359997255
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -0,0 +1,51 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture_cache/types.h"
+
+namespace OpenGL {
+
+class Image;
+class ImageBufferMap;
+class ProgramManager;
+
+class UtilShaders {
+public:
+ explicit UtilShaders(ProgramManager& program_manager);
+ ~UtilShaders();
+
+ void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void CopyBC4(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies);
+
+private:
+ ProgramManager& program_manager;
+
+ OGLBuffer swizzle_table_buffer;
+
+ OGLProgram block_linear_unswizzle_2d_program;
+ OGLProgram block_linear_unswizzle_3d_program;
+ OGLProgram pitch_unswizzle_program;
+ OGLProgram copy_bc4_program;
+};
+
+GLenum StoreFormat(u32 bytes_per_block);
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
deleted file mode 100644
index 6d7bb16b2..000000000
--- a/src/video_core/renderer_opengl/utils.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-#include <vector>
-
-#include <fmt/format.h>
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/utils.h"
-
-namespace OpenGL {
-
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
- if (!GLAD_GL_KHR_debug) {
- // We don't need to throw an error as this is just for debugging
- return;
- }
-
- std::string object_label;
- if (extra_info.empty()) {
- switch (identifier) {
- case GL_TEXTURE:
- object_label = fmt::format("Texture@0x{:016X}", addr);
- break;
- case GL_PROGRAM:
- object_label = fmt::format("Shader@0x{:016X}", addr);
- break;
- default:
- object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
- break;
- }
- } else {
- object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
- }
- glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
deleted file mode 100644
index 9c09ee12c..000000000
--- a/src/video_core/renderer_opengl/utils.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string_view>
-#include <vector>
-#include <glad/glad.h>
-#include "common/common_types.h"
-
-namespace OpenGL {
-
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
-
-} // namespace OpenGL