diff options
Diffstat (limited to 'src/video_core')
18 files changed, 619 insertions, 155 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f5ae57039..09ecc5bad 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -27,6 +27,8 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_primitive_assembler.cpp + renderer_opengl/gl_primitive_assembler.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer_cache.cpp diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ea1555c5d..912e785b9 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -4,11 +4,13 @@ #include "core/memory.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/rasterizer_interface.h" #include "video_core/textures/decoders.h" namespace Tegra::Engines { -Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} +Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) + : memory_manager(memory_manager), rasterizer{rasterizer} {} void Fermi2D::WriteReg(u32 method, u32 value) { ASSERT_MSG(method < Regs::NUM_REGS, @@ -44,27 +46,31 @@ void Fermi2D::HandleSurfaceCopy() { u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); - if (regs.src.linear == regs.dst.linear) { - // If the input layout and the output layout are the same, just perform a raw copy. - ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); - Memory::CopyBlock(dest_cpu, source_cpu, - src_bytes_per_pixel * regs.dst.width * regs.dst.height); - return; - } - - u8* src_buffer = Memory::GetPointer(source_cpu); - u8* dst_buffer = Memory::GetPointer(dest_cpu); - - if (!regs.src.linear && regs.dst.linear) { - // If the input is tiled and the output is linear, deswizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, - dst_bytes_per_pixel, src_buffer, dst_buffer, true, - regs.src.BlockHeight()); - } else { - // If the input is linear and the output is tiled, swizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, - dst_bytes_per_pixel, dst_buffer, src_buffer, false, - regs.dst.BlockHeight()); + if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { + // TODO(bunnei): The below implementation currently will not get hit, as + // AccelerateSurfaceCopy tries to always copy and will always return success. This should be + // changed once we properly support flushing. + + if (regs.src.linear == regs.dst.linear) { + // If the input layout and the output layout are the same, just perform a raw copy. + ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); + Memory::CopyBlock(dest_cpu, source_cpu, + src_bytes_per_pixel * regs.dst.width * regs.dst.height); + return; + } + u8* src_buffer = Memory::GetPointer(source_cpu); + u8* dst_buffer = Memory::GetPointer(dest_cpu); + if (!regs.src.linear && regs.dst.linear) { + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, src_buffer, dst_buffer, true, + regs.src.BlockHeight()); + } else { + // If the input is linear and the output is tiled, swizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, dst_buffer, src_buffer, false, + regs.dst.BlockHeight()); + } } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 021b83eaa..81d15c62a 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -12,6 +12,10 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra::Engines { #define FERMI2D_REG_INDEX(field_name) \ @@ -19,7 +23,7 @@ namespace Tegra::Engines { class Fermi2D final { public: - explicit Fermi2D(MemoryManager& memory_manager); + explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); ~Fermi2D() = default; /// Write the value to the register identified by method. @@ -94,6 +98,8 @@ public: MemoryManager& memory_manager; private: + VideoCore::RasterizerInterface& rasterizer; + /// Performs the copy from the source surface to the destination surface as configured in the /// registers. void HandleSurfaceCopy(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 9f5581045..4290da33f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -744,6 +744,12 @@ public: return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) | end_addr_low); } + + /// Adjust the index buffer offset so it points to the first desired index. + GPUVAddr IndexStart() const { + return StartAddress() + static_cast<size_t>(first) * + static_cast<size_t>(FormatSizeInBytes()); + } } index_array; INSERT_PADDING_WORDS(0x7); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index baa8b63b7..9ba7e3533 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -25,7 +25,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { memory_manager = std::make_unique<Tegra::MemoryManager>(); maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); - fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); + fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index cd819d69f..06fc59dbe 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -33,13 +34,9 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; - /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 - virtual bool AccelerateDisplayTransfer(const void* config) { - return false; - } - - /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1 - virtual bool AccelerateTextureCopy(const void* config) { + /// Attempt to use a faster method to perform a surface copy + virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst) { return false; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 578aca789..c142095c5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size } AlignBuffer(alignment); - GLintptr uploaded_offset = buffer_offset; + const GLintptr uploaded_offset = buffer_offset; Memory::ReadBlock(*cpu_addr, buffer_ptr, size); @@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s std::size_t alignment) { AlignBuffer(alignment); std::memcpy(buffer_ptr, raw_pointer, size); - GLintptr uploaded_offset = buffer_offset; + const GLintptr uploaded_offset = buffer_offset; buffer_ptr += size; buffer_offset += size; return uploaded_offset; } +std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) { + AlignBuffer(alignment); + u8* const uploaded_ptr = buffer_ptr; + const GLintptr uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return std::make_tuple(uploaded_ptr, uploaded_offset); +} + void OGLBufferCache::Map(std::size_t max_size) { bool invalidate; std::tie(buffer_ptr, buffer_offset_base, invalidate) = @@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) { InvalidateAll(); } } + void OGLBufferCache::Unmap() { stream_buffer.Unmap(buffer_offset - buffer_offset_base); } @@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const { void OGLBufferCache::AlignBuffer(std::size_t alignment) { // Align the offset, not the mapped pointer - GLintptr offset_aligned = + const GLintptr offset_aligned = static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); buffer_ptr += offset_aligned - buffer_offset; buffer_offset = offset_aligned; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6c18461f4..965976334 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -6,6 +6,7 @@ #include <cstddef> #include <memory> +#include <tuple> #include "common/common_types.h" #include "video_core/rasterizer_cache.h" @@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer public: explicit OGLBufferCache(std::size_t size); + /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been + /// allocated. GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool cache = true); + /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); + /// Reserves memory to be used by host's CPU. Returns mapped address and offset. + std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); + void Map(std::size_t max_size); void Unmap(); diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp new file mode 100644 index 000000000..ee1d9601b --- /dev/null +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -0,0 +1,64 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_primitive_assembler.h" + +namespace OpenGL { + +constexpr u32 TRIANGLES_PER_QUAD = 6; +constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3}; + +PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {} + +PrimitiveAssembler::~PrimitiveAssembler() = default; + +std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const { + ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4"); + return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint); +} + +GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { + const std::size_t size{CalculateQuadSize(count)}; + auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size); + + for (u32 primitive = 0; primitive < count / 4; ++primitive) { + for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) { + const u32 index = first + primitive * 4 + QUAD_MAP[i]; + std::memcpy(dst_pointer, &index, sizeof(index)); + dst_pointer += sizeof(index); + } + } + + return index_offset; +} + +GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, + u32 count) { + const std::size_t map_size{CalculateQuadSize(count)}; + auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); + + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + const u8* source{Memory::GetPointer(*cpu_addr)}; + + for (u32 primitive = 0; primitive < count / 4; ++primitive) { + for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { + const u32 index = primitive * 4 + QUAD_MAP[i]; + const u8* src_offset = source + (index * index_size); + + std::memcpy(dst_pointer, src_offset, index_size); + dst_pointer += index_size; + } + } + + return index_offset; +} + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h new file mode 100644 index 000000000..a8cb88eb5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h @@ -0,0 +1,33 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include <glad/glad.h> + +#include "common/common_types.h" +#include "video_core/memory_manager.h" + +namespace OpenGL { + +class OGLBufferCache; + +class PrimitiveAssembler { +public: + explicit PrimitiveAssembler(OGLBufferCache& buffer_cache); + ~PrimitiveAssembler(); + + /// Calculates the size required by MakeQuadArray and MakeQuadIndexed. + std::size_t CalculateQuadSize(u32 count) const; + + GLintptr MakeQuadArray(u32 first, u32 count); + + GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count); + +private: + OGLBufferCache& buffer_cache; +}; + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 587d9dffb..209bdf181 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); +MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); + +struct DrawParameters { + GLenum primitive_mode; + GLsizei count; + GLint current_instance; + bool use_indexed; + + GLint vertex_first; + + GLenum index_format; + GLint base_vertex; + GLintptr index_buffer_offset; + + void DispatchDraw() const { + if (use_indexed) { + const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); + if (current_instance > 0) { + glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, + index_buffer_ptr, 1, base_vertex, + current_instance); + } else { + glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, + base_vertex); + } + } else { + if (current_instance > 0) { + glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1, + current_instance); + } else { + glDrawArrays(primitive_mode, vertex_first, count); + } + } + } +}; RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { @@ -172,6 +207,54 @@ void RasterizerOpenGL::SetupVertexArrays() { } } +DrawParameters RasterizerOpenGL::SetupDraw() { + const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + const auto& regs = gpu.regs; + const bool is_indexed = accelerate_draw == AccelDraw::Indexed; + + DrawParameters params{}; + params.current_instance = gpu.state.current_instance; + + if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { + MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly); + + params.use_indexed = true; + params.primitive_mode = GL_TRIANGLES; + + if (is_indexed) { + params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); + params.count = (regs.index_array.count / 4) * 6; + params.index_buffer_offset = primitive_assembler.MakeQuadIndexed( + regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(), + regs.index_array.count); + params.base_vertex = static_cast<GLint>(regs.vb_element_base); + } else { + // MakeQuadArray always generates u32 indexes + params.index_format = GL_UNSIGNED_INT; + params.count = (regs.vertex_buffer.count / 4) * 6; + params.index_buffer_offset = + primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count); + } + return params; + } + + params.use_indexed = is_indexed; + params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); + + if (is_indexed) { + MICROPROFILE_SCOPE(OpenGL_Index); + params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); + params.count = regs.index_array.count; + params.index_buffer_offset = + buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); + params.base_vertex = static_cast<GLint>(regs.vb_element_base); + } else { + params.count = regs.vertex_buffer.count; + params.vertex_first = regs.vertex_buffer.first; + } + return params; +} + void RasterizerOpenGL::SetupShaders() { MICROPROFILE_SCOPE(OpenGL_Shader); const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); @@ -256,6 +339,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { return size; } +std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + + return static_cast<std::size_t>(regs.index_array.count) * + static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); +} + bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); @@ -459,16 +549,23 @@ void RasterizerOpenGL::DrawArrays() { // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) * - static_cast<u64>(regs.index_array.FormatSizeInBytes())}; state.draw.vertex_buffer = buffer_cache.GetHandle(); state.Apply(); std::size_t buffer_size = CalculateVertexArraysSize(); - if (is_indexed) { - buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size; + // Add space for index buffer (keeping in mind non-core primitives) + switch (regs.draw.topology) { + case Maxwell::PrimitiveTopology::Quads: + buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + + primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); + break; + default: + if (is_indexed) { + buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize(); + } + break; } // Uniform space for the 5 shader stages @@ -482,20 +579,7 @@ void RasterizerOpenGL::DrawArrays() { buffer_cache.Map(buffer_size); SetupVertexArrays(); - - // If indexed mode, copy the index buffer - GLintptr index_buffer_offset = 0; - if (is_indexed) { - MICROPROFILE_SCOPE(OpenGL_Index); - - // Adjust the index buffer offset so it points to the first desired index. - auto index_start = regs.index_array.StartAddress(); - index_start += static_cast<size_t>(regs.index_array.first) * - static_cast<size_t>(regs.index_array.FormatSizeInBytes()); - - index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size); - } - + DrawParameters params = SetupDraw(); SetupShaders(); buffer_cache.Unmap(); @@ -503,31 +587,8 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); - const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; - if (is_indexed) { - const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)}; - - if (gpu.state.current_instance > 0) { - glDrawElementsInstancedBaseVertexBaseInstance( - primitive_mode, regs.index_array.count, - MaxwellToGL::IndexFormat(regs.index_array.format), - reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex, - gpu.state.current_instance); - } else { - glDrawElementsBaseVertex(primitive_mode, regs.index_array.count, - MaxwellToGL::IndexFormat(regs.index_array.format), - reinterpret_cast<const void*>(index_buffer_offset), - base_vertex); - } - } else { - if (gpu.state.current_instance > 0) { - glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first, - regs.vertex_buffer.count, 1, - gpu.state.current_instance); - } else { - glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count); - } - } + // Execute draw call + params.DispatchDraw(); // Disable scissor test state.scissor.enabled = false; @@ -556,14 +617,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { InvalidateRegion(addr, size); } -bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { +bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst) { MICROPROFILE_SCOPE(OpenGL_Blits); - UNREACHABLE(); - return true; -} - -bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { - UNREACHABLE(); + res_cache.FermiCopySurface(src, dst); return true; } @@ -601,10 +658,13 @@ void RasterizerOpenGL::SamplerInfo::Create() { sampler.Create(); mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; + uses_depth_compare = false; + depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; // default is GL_LINEAR_MIPMAP_LINEAR glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // Other attributes have correct defaults + glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); } void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { @@ -632,6 +692,21 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); } + if (uses_depth_compare != (config.depth_compare_enabled == 1)) { + uses_depth_compare = (config.depth_compare_enabled == 1); + if (uses_depth_compare) { + glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); + } else { + glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE); + } + } + + if (depth_compare_func != config.depth_compare_func) { + depth_compare_func = config.depth_compare_func; + glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, + MaxwellToGL::DepthCompareFunc(depth_compare_func)); + } + if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border || wrap_p == Tegra::Texture::WrapMode::Border) { const GLvec4 new_border_color = {{config.border_color_r, config.border_color_g, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4c8ecbd1c..0dab2018b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -38,6 +39,7 @@ class EmuWindow; namespace OpenGL { struct ScreenInfo; +struct DrawParameters; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -50,8 +52,8 @@ public: void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; - bool AccelerateDisplayTransfer(const void* config) override; - bool AccelerateTextureCopy(const void* config) override; + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; bool AccelerateFill(const void* config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -94,6 +96,8 @@ private: Tegra::Texture::WrapMode wrap_u; Tegra::Texture::WrapMode wrap_v; Tegra::Texture::WrapMode wrap_p; + bool uses_depth_compare; + Tegra::Texture::DepthCompareFunc depth_compare_func; GLvec4 border_color; }; @@ -192,12 +196,17 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; OGLFramebuffer framebuffer; + PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; std::size_t CalculateVertexArraysSize() const; + std::size_t CalculateIndexBufferSize() const; + void SetupVertexArrays(); + DrawParameters SetupDraw(); + void SetupShaders(); enum class AccelDraw { Disabled, Arrays, Indexed }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ce967c4d6..56ff83eff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -143,6 +143,28 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { return params; } +/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params{}; + params.addr = TryGetCpuAddr(config.Address()); + params.is_tiled = !config.linear; + params.block_height = params.is_tiled ? config.BlockHeight() : 0, + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.size_in_bytes_total = params.SizeInBytesTotal(); + params.size_in_bytes_2d = params.SizeInBytes2D(); + params.max_mip_level = 0; + params.rt = {}; + + return params; +} + static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S @@ -559,6 +581,18 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, return true; } +static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { + const auto& src_params{src_surface->GetSurfaceParams()}; + const auto& dst_params{dst_surface->GetSurfaceParams()}; + + const u32 width{std::min(src_params.width, dst_params.width)}; + const u32 height{std::min(src_params.height, dst_params.height)}; + + glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, + 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, + 0, 0, width, height, 1); +} + static void CopySurface(const Surface& src_surface, const Surface& dst_surface, GLuint copy_pbo_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -1033,6 +1067,26 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { return surface; } +void RasterizerCacheOpenGL::FermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { + + const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); + const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); + + ASSERT(src_params.width == dst_params.width); + ASSERT(src_params.height == dst_params.height); + ASSERT(src_params.pixel_format == dst_params.pixel_format); + ASSERT(src_params.block_height == dst_params.block_height); + ASSERT(src_params.is_tiled == dst_params.is_tiled); + ASSERT(src_params.depth == dst_params.depth); + ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces + ASSERT(src_params.target == dst_params.target); + ASSERT(src_params.rt.index == dst_params.rt.index); + + FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); +} + Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params) { // Verify surface is compatible for blitting @@ -1041,6 +1095,15 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, // Get a new surface with the new parameters, and blit the previous surface to it Surface new_surface{GetUncachedSurface(new_params)}; + // For compatible surfaces, we can just do fast glCopyImageSubData based copy + if (old_params.target == new_params.target && old_params.type == new_params.type && + old_params.depth == new_params.depth && old_params.depth == 1 && + SurfaceParams::GetFormatBpp(old_params.pixel_format) == + SurfaceParams::GetFormatBpp(new_params.pixel_format)) { + FastCopySurface(old_surface, new_surface); + return new_surface; + } + // If the format is the same, just do a framebuffer blit. This is significantly faster than // using PBOs. The is also likely less accurate, as textures will be converted rather than // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 49025a3fe..0b4940b3c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" +#include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -719,6 +720,10 @@ struct SurfaceParams { Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format); + /// Creates SurfaceParams for a Fermi2D surface copy + static SurfaceParams CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config); + /// Checks if surfaces are compatible for caching bool IsCompatibleSurface(const SurfaceParams& other) const { return std::tie(pixel_format, type, width, height, target, depth) == @@ -837,6 +842,10 @@ public: /// Tries to find a framebuffer using on the provided CPU address Surface TryFindFramebufferSurface(VAddr addr) const; + /// Copies the contents of one surface to another + void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); + private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 579a78702..7e57de78a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -508,7 +508,7 @@ public: /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if /// necessary. std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, - bool is_array) { + bool is_array, bool is_shadow) { const std::size_t offset = static_cast<std::size_t>(sampler.index.Value()); // If this sampler has already been used, return the existing mapping. @@ -517,13 +517,14 @@ public: [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; }); if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array); + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); return itr->GetName(); } // Otherwise create a new mapping for this sampler const std::size_t next_index = used_samplers.size(); - const SamplerEntry entry{stage, offset, next_index, type, is_array}; + const SamplerEntry entry{stage, offset, next_index, type, is_array, is_shadow}; used_samplers.emplace_back(entry); return entry.GetName(); } @@ -747,8 +748,9 @@ private: } /// Generates code representing a texture sampler. - std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) { - return regs.AccessSampler(sampler, type, is_array); + std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, + bool is_shadow) { + return regs.AccessSampler(sampler, type, is_array, is_shadow); } /** @@ -1002,6 +1004,24 @@ private: shader.AddLine('}'); } + static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { + switch (texture_type) { + case Tegra::Shader::TextureType::Texture1D: { + return 1; + } + case Tegra::Shader::TextureType::Texture2D: { + return 2; + } + case Tegra::Shader::TextureType::TextureCube: { + return 3; + } + default: + LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", static_cast<u32>(texture_type)); + UNREACHABLE(); + return 0; + } + } + /* * Emits code to push the input target address to the SSY address stack, incrementing the stack * top. @@ -1896,24 +1916,35 @@ private: "NODEP is not implemented"); ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), - "DC is not implemented"); - switch (texture_type) { - case Tegra::Shader::TextureType::Texture1D: { + const bool depth_compare = + instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); + u32 num_coordinates = TextureCoordinates(texture_type); + if (depth_compare) + num_coordinates += 1; + + switch (num_coordinates) { + case 1: { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); coord = "float coords = " + x + ';'; break; } - case Tegra::Shader::TextureType::Texture2D: { + case 2: { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; break; } + case 3: { + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string z = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + break; + } default: - LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", - static_cast<u32>(texture_type)); + LOG_CRITICAL(HW_GPU, "Unhandled coordinates number {}", + static_cast<u32>(num_coordinates)); UNREACHABLE(); // Fallback to interpreting as a 2D texture for now @@ -1924,9 +1955,10 @@ private: } // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias // or lod. - const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20); + std::string op_c; - const std::string sampler = GetSampler(instr.sampler, texture_type, false); + const std::string sampler = + GetSampler(instr.sampler, texture_type, false, depth_compare); // Add an extra scope and declare the texture coords inside to prevent // overwriting them in case they are used as outputs of the texs instruction. @@ -1935,7 +1967,7 @@ private: shader.AddLine(coord); std::string texture; - switch (instr.tex.process_mode) { + switch (instr.tex.GetTextureProcessMode()) { case Tegra::Shader::TextureProcessMode::None: { texture = "texture(" + sampler + ", coords)"; break; @@ -1946,12 +1978,22 @@ private: } case Tegra::Shader::TextureProcessMode::LB: case Tegra::Shader::TextureProcessMode::LBA: { + if (num_coordinates <= 2) { + op_c = regs.GetRegisterAsFloat(instr.gpr20); + } else { + op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); + } // TODO: Figure if A suffix changes the equation at all. texture = "texture(" + sampler + ", coords, " + op_c + ')'; break; } case Tegra::Shader::TextureProcessMode::LL: case Tegra::Shader::TextureProcessMode::LLA: { + if (num_coordinates <= 2) { + op_c = regs.GetRegisterAsFloat(instr.gpr20); + } else { + op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); + } // TODO: Figure if A suffix changes the equation at all. texture = "textureLod(" + sampler + ", coords, " + op_c + ')'; break; @@ -1959,18 +2001,22 @@ private: default: { texture = "texture(" + sampler + ", coords)"; LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}", - static_cast<u32>(instr.tex.process_mode.Value())); + static_cast<u32>(instr.tex.GetTextureProcessMode())); UNREACHABLE(); } } - std::size_t dest_elem{}; - for (std::size_t elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; + if (!depth_compare) { + std::size_t dest_elem{}; + for (std::size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); + ++dest_elem; } - regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); - ++dest_elem; + } else { + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); } --shader.scope; shader.AddLine("}"); @@ -1983,11 +2029,15 @@ private: ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); - ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), - "DC is not implemented"); - switch (texture_type) { - case Tegra::Shader::TextureType::Texture2D: { + const bool depth_compare = + instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); + u32 num_coordinates = TextureCoordinates(texture_type); + if (depth_compare) + num_coordinates += 1; + + switch (num_coordinates) { + case 2: { if (is_array) { const std::string index = regs.GetRegisterAsInteger(instr.gpr8); const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); @@ -2000,17 +2050,25 @@ private: } break; } - case Tegra::Shader::TextureType::TextureCube: { - ASSERT_MSG(!is_array, "Unimplemented"); - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + case 3: { + if (is_array) { + UNIMPLEMENTED_MSG("3-coordinate arrays not fully implemented"); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + texture_type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + } else { + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string z = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + } break; } default: - LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", - static_cast<u32>(texture_type)); + LOG_CRITICAL(HW_GPU, "Unhandled coordinates number {}", + static_cast<u32>(num_coordinates)); UNREACHABLE(); // Fallback to interpreting as a 2D texture for now @@ -2020,9 +2078,35 @@ private: texture_type = Tegra::Shader::TextureType::Texture2D; is_array = false; } - const std::string sampler = GetSampler(instr.sampler, texture_type, is_array); - const std::string texture = "texture(" + sampler + ", coords)"; - WriteTexsInstruction(instr, coord, texture); + const std::string sampler = + GetSampler(instr.sampler, texture_type, is_array, depth_compare); + std::string texture; + switch (instr.texs.GetTextureProcessMode()) { + case Tegra::Shader::TextureProcessMode::None: { + texture = "texture(" + sampler + ", coords)"; + break; + } + case Tegra::Shader::TextureProcessMode::LZ: { + texture = "textureLod(" + sampler + ", coords, 0.0)"; + break; + } + case Tegra::Shader::TextureProcessMode::LL: { + const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); + texture = "textureLod(" + sampler + ", coords, " + op_c + ')'; + break; + } + default: { + texture = "texture(" + sampler + ", coords)"; + LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}", + static_cast<u32>(instr.texs.GetTextureProcessMode())); + UNREACHABLE(); + } + } + if (!depth_compare) { + WriteTexsInstruction(instr, coord, texture); + } else { + WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); + } break; } case OpCode::Id::TLDS: { @@ -2062,9 +2146,26 @@ private: static_cast<u32>(texture_type)); UNREACHABLE(); } - - const std::string sampler = GetSampler(instr.sampler, texture_type, is_array); - const std::string texture = "texelFetch(" + sampler + ", coords, 0)"; + const std::string sampler = + GetSampler(instr.sampler, texture_type, is_array, false); + std::string texture = "texelFetch(" + sampler + ", coords, 0)"; + const std::string op_c = regs.GetRegisterAsInteger(instr.gpr20.Value() + 1); + switch (instr.tlds.GetTextureProcessMode()) { + case Tegra::Shader::TextureProcessMode::LZ: { + texture = "texelFetch(" + sampler + ", coords, 0)"; + break; + } + case Tegra::Shader::TextureProcessMode::LL: { + texture = "texelFetch(" + sampler + ", coords, " + op_c + ')'; + break; + } + default: { + texture = "texelFetch(" + sampler + ", coords, 0)"; + LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}", + static_cast<u32>(instr.tlds.GetTextureProcessMode())); + UNREACHABLE(); + } + } WriteTexsInstruction(instr, coord, texture); break; } @@ -2077,28 +2178,43 @@ private: "NODEP is not implemented"); ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), - "DC is not implemented"); ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), "PTP is not implemented"); - - switch (instr.tld4.texture_type) { - case Tegra::Shader::TextureType::Texture2D: { + const bool depth_compare = + instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); + auto texture_type = instr.tld4.texture_type.Value(); + u32 num_coordinates = TextureCoordinates(texture_type); + if (depth_compare) + num_coordinates += 1; + + switch (num_coordinates) { + case 2: { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; break; } + case 3: { + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + break; + } default: - LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", - static_cast<u32>(instr.tld4.texture_type.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled coordinates number {}", + static_cast<u32>(num_coordinates)); UNREACHABLE(); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + texture_type = Tegra::Shader::TextureType::Texture2D; } const std::string sampler = - GetSampler(instr.sampler, instr.tld4.texture_type, false); + GetSampler(instr.sampler, texture_type, false, depth_compare); // Add an extra scope and declare the texture coords inside to prevent // overwriting them in case they are used as outputs of the texs instruction. shader.AddLine("{"); @@ -2106,15 +2222,18 @@ private: shader.AddLine(coord); const std::string texture = "textureGather(" + sampler + ", coords, " + std::to_string(instr.tld4.component) + ')'; - - std::size_t dest_elem{}; - for (std::size_t elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; + if (!depth_compare) { + std::size_t dest_elem{}; + for (std::size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); + ++dest_elem; } - regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); - ++dest_elem; + } else { + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); } --shader.scope; shader.AddLine("}"); @@ -2125,18 +2244,30 @@ private: "NODEP is not implemented"); ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), - "DC is not implemented"); + const bool depth_compare = + instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - const std::string sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false); - const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + const std::string sampler = GetSampler( + instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); + std::string coord; + if (!depth_compare) { + coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + } else { + // Note: TLD4S coordinate encoding works just like TEXS's + const std::string op_c = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + coord = "vec3 coords = vec3(" + op_a + ", " + op_c + ", " + op_b + ");"; + } const std::string texture = "textureGather(" + sampler + ", coords, " + std::to_string(instr.tld4s.component) + ')'; - WriteTexsInstruction(instr, coord, texture); + + if (!depth_compare) { + WriteTexsInstruction(instr, coord, texture); + } else { + WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); + } break; } case OpCode::Id::TXQ: { @@ -2147,7 +2278,7 @@ private: // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const std::string sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false); + GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); switch (instr.txq.query_type) { case Tegra::Shader::TextureQueryType::Dimension: { const std::string texture = "textureQueryLevels(" + sampler + ')'; @@ -2172,7 +2303,8 @@ private: const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const bool is_array = instr.tmml.array != 0; auto texture_type = instr.tmml.texture_type.Value(); - const std::string sampler = GetSampler(instr.sampler, texture_type, is_array); + const std::string sampler = + GetSampler(instr.sampler, texture_type, is_array, false); // TODO: add coordinates for different samplers once other texture types are // implemented. diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index d53b93ad5..e56f39e78 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -75,8 +75,9 @@ class SamplerEntry { public: SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index, - Tegra::Shader::TextureType type, bool is_array) - : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array), + is_shadow(is_shadow) {} std::size_t GetOffset() const { return offset; @@ -117,6 +118,8 @@ public: } if (is_array) glsl_type += "Array"; + if (is_shadow) + glsl_type += "Shadow"; return glsl_type; } @@ -128,6 +131,10 @@ public: return is_array; } + bool IsShadow() const { + return is_shadow; + } + u32 GetHash() const { return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index); } @@ -147,7 +154,8 @@ private: Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) - bool is_array; ///< Whether the texture is being sampled as an array texture or not. + bool is_array; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not. }; struct ShaderEntries { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 67273e164..3c3bcaae4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -159,6 +159,31 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { return {}; } +inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { + switch (func) { + case Tegra::Texture::DepthCompareFunc::Never: + return GL_NEVER; + case Tegra::Texture::DepthCompareFunc::Less: + return GL_LESS; + case Tegra::Texture::DepthCompareFunc::LessEqual: + return GL_LEQUAL; + case Tegra::Texture::DepthCompareFunc::Equal: + return GL_EQUAL; + case Tegra::Texture::DepthCompareFunc::NotEqual: + return GL_NOTEQUAL; + case Tegra::Texture::DepthCompareFunc::Greater: + return GL_GREATER; + case Tegra::Texture::DepthCompareFunc::GreaterEqual: + return GL_GEQUAL; + case Tegra::Texture::DepthCompareFunc::Always: + return GL_ALWAYS; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented texture depth compare function ={}", + static_cast<u32>(func)); + UNREACHABLE(); + return {}; +} + inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { switch (equation) { case Maxwell::Blend::Equation::Add: diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 14aea4838..8f31d825a 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -227,6 +227,17 @@ enum class WrapMode : u32 { MirrorOnceClampOGL = 7, }; +enum class DepthCompareFunc : u32 { + Never = 0, + Less = 1, + Equal = 2, + LessEqual = 3, + Greater = 4, + NotEqual = 5, + GreaterEqual = 6, + Always = 7, +}; + enum class TextureFilter : u32 { Nearest = 1, Linear = 2, @@ -244,7 +255,7 @@ struct TSCEntry { BitField<3, 3, WrapMode> wrap_v; BitField<6, 3, WrapMode> wrap_p; BitField<9, 1, u32> depth_compare_enabled; - BitField<10, 3, u32> depth_compare_func; + BitField<10, 3, DepthCompareFunc> depth_compare_func; }; union { BitField<0, 2, TextureFilter> mag_filter; |