diff options
Diffstat (limited to 'src/video_core')
24 files changed, 126 insertions, 112 deletions
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index f8aa4ff55..082a40cd9 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> + #include "common/assert.h" #include "video_core/engines/engine_upload.h" #include "video_core/memory_manager.h" @@ -10,7 +12,9 @@ namespace Tegra::Engines::Upload { State::State(MemoryManager& memory_manager, Registers& regs) - : memory_manager(memory_manager), regs(regs) {} + : regs{regs}, memory_manager{memory_manager} {} + +State::~State() = default; void State::ProcessExec(const bool is_linear) { write_offset = 0; diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 9c6e0d21c..ef4f5839a 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -4,10 +4,8 @@ #pragma once -#include <cstddef> #include <vector> #include "common/bit_field.h" -#include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra { @@ -57,10 +55,10 @@ struct Registers { class State { public: State(MemoryManager& memory_manager, Registers& regs); - ~State() = default; + ~State(); - void ProcessExec(const bool is_linear); - void ProcessData(const u32 data, const bool is_last_call); + void ProcessExec(bool is_linear); + void ProcessData(u32 data, bool is_last_call); private: u32 write_offset = 0; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index d7b586db9..39968d403 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() { // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is // needed for ARMS. - for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) { - regs.viewports[viewport].depth_range_near = 0.0f; - regs.viewports[viewport].depth_range_far = 1.0f; + for (auto& viewport : regs.viewports) { + viewport.depth_range_near = 0.0f; + viewport.depth_range_far = 1.0f; } // Doom and Bomberman seems to use the uninitialized registers and just enable blend @@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.blend.equation_a = Regs::Blend::Equation::Add; regs.blend.factor_source_a = Regs::Blend::Factor::One; regs.blend.factor_dest_a = Regs::Blend::Factor::Zero; - for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) { - regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add; - regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One; - regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero; - regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add; - regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One; - regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero; + for (auto& blend : regs.independent_blend) { + blend.equation_rgb = Regs::Blend::Equation::Add; + blend.factor_source_rgb = Regs::Blend::Factor::One; + blend.factor_dest_rgb = Regs::Blend::Factor::Zero; + blend.equation_a = Regs::Blend::Equation::Add; + blend.factor_source_a = Regs::Blend::Factor::One; + blend.factor_dest_a = Regs::Blend::Factor::Zero; } regs.stencil_front_op_fail = Regs::StencilOp::Keep; regs.stencil_front_op_zfail = Regs::StencilOp::Keep; @@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() { // TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a // default of enabled fixes rendering here. - for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) { - regs.color_mask[color_mask].R.Assign(1); - regs.color_mask[color_mask].G.Assign(1); - regs.color_mask[color_mask].B.Assign(1); - regs.color_mask[color_mask].A.Assign(1); + for (auto& color_mask : regs.color_mask) { + color_mask.R.Assign(1); + color_mask.G.Assign(1); + color_mask.B.Assign(1); + color_mask.A.Assign(1); } // Commercial games seem to assume this value is enabled and nouveau sets this value manually. @@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { // Vertex buffer if (method >= MAXWELL3D_REG_INDEX(vertex_array) && - method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { + method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && - method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { + method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && - method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { + method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); } } @@ -442,7 +442,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { const auto a_type = tic_entry.a_type.Value(); // TODO(Subv): Different data types for separate components are not supported - ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); + DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); return tic_entry; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 4883b582a..48e4fec33 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include <array> #include <bitset> +#include <type_traits> #include <unordered_map> #include <vector> @@ -1107,6 +1108,7 @@ public: } regs{}; static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); + static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); struct State { struct ConstBufferInfo { diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c9a2077de..1e2ff46b0 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p renderer.Rasterizer().FlushRegion(data->addr, data->size); } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { renderer.Rasterizer().InvalidateRegion(data->addr, data->size); - } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) { + } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { return; } else { UNREACHABLE(); @@ -118,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) { // Wait for the GPU to be idle (all commands to be executed) { MICROPROFILE_SCOPE(GPU_wait); - std::unique_lock<std::mutex> lock{synchronization_mutex}; + std::unique_lock lock{synchronization_mutex}; synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); } } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cc14527c7..cdf86f562 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -109,7 +109,7 @@ struct SynchState final { void TrySynchronize() { if (IsSynchronized()) { - std::lock_guard<std::mutex> lock{synchronization_mutex}; + std::lock_guard lock{synchronization_mutex}; synchronization_condition.notify_one(); } } diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 524d9ea5a..fbea107ca 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -118,10 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { static_cast<u32>(opcode.operation.Value())); } - if (opcode.is_exit) { + // An instruction with the Exit flag will not actually + // cause an exit if it's executed inside a delay slot. + if (opcode.is_exit && !is_delay_slot) { // Exit has a delay slot, execute the next instruction - // Note: Executing an exit during a branch delay slot will cause the instruction at the - // branch target to be executed before exiting. Step(offset, true); return false; } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6c98c6701..5d8d126c1 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste UpdatePageTableForVMA(initial_vma); } +MemoryManager::~MemoryManager() = default; + GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; @@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } -bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) { +bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { const GPUVAddr end = start + size; const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); - const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); + const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); return range == size; } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index e4f0c4bd6..113f9d8f3 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -47,7 +47,8 @@ struct VirtualMemoryArea { class MemoryManager final { public: - MemoryManager(VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); + ~MemoryManager(); GPUVAddr AllocateSpace(u64 size, u64 align); GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); @@ -65,18 +66,18 @@ public: u8* GetPointer(GPUVAddr addr); const u8* GetPointer(GPUVAddr addr) const; - // Returns true if the block is continous in host memory, false otherwise - bool IsBlockContinous(const GPUVAddr start, const std::size_t size); + /// Returns true if the block is continuous in host memory, false otherwise + bool IsBlockContinuous(GPUVAddr start, std::size_t size) const; /** * ReadBlock and WriteBlock are full read and write operations over virtual - * GPU Memory. It's important to use these when GPU memory may not be continous + * GPU Memory. It's important to use these when GPU memory may not be continuous * in the Host Memory counterpart. Note: This functions cause Host GPU Memory * Flushes and Invalidations, respectively to each operation. */ - void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; - void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); - void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); + void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); + void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); /** * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and @@ -88,9 +89,9 @@ public: * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture * being flushed. */ - void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; - void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); - void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); + void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); + void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); private: using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; @@ -111,10 +112,10 @@ private: /** * Maps an unmanaged host memory pointer at a given address. * - * @param target The guest address to start the mapping at. - * @param memory The memory to be mapped. - * @param size Size of the mapping. - * @param state MemoryState tag to attach to the VMA. + * @param target The guest address to start the mapping at. + * @param memory The memory to be mapped. + * @param size Size of the mapping in bytes. + * @param backing_addr The base address of the range to back this mapping. */ VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr); @@ -124,7 +125,7 @@ private: /// Converts a VMAHandle to a mutable VMAIter. VMAIter StripIterConstness(const VMAHandle& iter); - /// Marks as the specfied VMA as allocated. + /// Marks as the specified VMA as allocated. VMAIter Allocate(VMAIter vma); /** diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 291772186..f820f3ed9 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -37,9 +37,6 @@ public: /// Gets the size of the shader in guest memory, required for cache management virtual std::size_t GetSizeInBytes() const = 0; - /// Wriets any cached resources back to memory - virtual void Flush() = 0; - /// Sets whether the cached object should be considered registered void SetIsRegistered(bool registered) { is_registered = registered; @@ -158,6 +155,8 @@ protected: return ++modified_ticks; } + virtual void FlushObjectInner(const T& object) = 0; + /// Flushes the specified object, updating appropriate cache state as needed void FlushObject(const T& object) { std::lock_guard lock{mutex}; @@ -165,7 +164,7 @@ protected: if (!object->IsDirty()) { return; } - object->Flush(); + FlushObjectInner(object); object->MarkAsModified(false, *this); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fc33aa433..f9247a40e 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -42,9 +42,6 @@ public: return alignment; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - private: VAddr cpu_addr{}; std::size_t size{}; @@ -75,6 +72,9 @@ public: protected: void AlignBuffer(std::size_t alignment); + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + private: OGLStreamBuffer stream_buffer; diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 196e6e278..2d467a240 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -46,7 +46,7 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - void Flush() override; + void Flush(); private: VAddr cpu_addr{}; @@ -65,6 +65,11 @@ public: GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); +protected: + void FlushObjectInner(const GlobalRegion& object) override { + object->Flush(); + } + private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3cc945235..dbd8049f5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() { // MakeQuadArray always generates u32 indexes params.index_format = GL_UNSIGNED_INT; params.count = (regs.vertex_buffer.count / 4) * 6; - params.index_buffer_offset = - primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count); + params.index_buffer_offset = primitive_assembler.MakeQuadArray( + regs.vertex_buffer.first, regs.vertex_buffer.count); } return params; } @@ -1135,7 +1135,9 @@ void RasterizerOpenGL::SyncTransformFeedback() { void RasterizerOpenGL::SyncPointState() { const auto& regs = system.GPU().Maxwell3D().regs; - state.point.size = regs.point_size; + // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid + // in OpenGL). + state.point.size = std::max(1.0f, regs.point_size); } void RasterizerOpenGL::SyncPolygonOffset() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5a25f5b37..a7681902e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params) } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGLBuffer() { +void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - gl_buffer.resize(params.max_mip_level); + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; + if (gl_buffer.size() < params.max_mip_level) + gl_buffer.resize(params.max_mip_level); for (u32 i = 0; i < params.max_mip_level; i++) gl_buffer[i].resize(params.GetMipmapSizeGL(i)); if (params.is_tiled) { @@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() { } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer() { +void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; // OpenGL temporary buffer needs to be big enough to store raw texture size - gl_buffer.resize(1); gl_buffer[0].resize(GetSizeInBytes()); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); @@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() { } } -void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, - GLuint draw_fb_handle) { +void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, + GLuint read_fb_handle, GLuint draw_fb_handle) { const auto& rect{params.GetRect(mip_map)}; + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; + // Load data from memory to the surface const auto x0 = static_cast<GLint>(rect.left); const auto y0 = static_cast<GLint>(rect.bottom); @@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, tuple.type, &gl_buffer[mip_map][buffer_offset]); break; case SurfaceTarget::TextureCubemap: { - std::size_t start = buffer_offset; for (std::size_t face = 0; face < params.depth; ++face) { glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face), static_cast<GLsizei>(rect.GetWidth()), @@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() { } MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { +void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, + GLuint read_fb_handle, GLuint draw_fb_handle) { MICROPROFILE_SCOPE(OpenGL_TextureUL); for (u32 i = 0; i < params.max_mip_level; i++) - UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle); + UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle); } void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, @@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { - surface->LoadGLBuffer(); - surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->LoadGLBuffer(temporal_memory); + surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); surface->MarkForReload(false); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index db280dbb3..6263ef3e7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -355,6 +355,12 @@ namespace OpenGL { class RasterizerOpenGL; +// This is used to store temporary big buffers, +// instead of creating/destroying all the time +struct RasterizerTemporaryMemory { + std::vector<std::vector<u8>> gl_buffer; +}; + class CachedSurface final : public RasterizerCacheObject { public: explicit CachedSurface(const SurfaceParams& params); @@ -371,10 +377,6 @@ public: return memory_size; } - void Flush() override { - FlushGLBuffer(); - } - const OGLTexture& Texture() const { return texture; } @@ -397,11 +399,12 @@ public: } // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(); - void FlushGLBuffer(); + void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); + void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); // Upload data in gl_buffer to this surface's texture - void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, + GLuint draw_fb_handle); void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, Tegra::Texture::SwizzleSource swizzle_y, @@ -429,13 +432,13 @@ public: } private: - void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); + void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, + GLuint read_fb_handle, GLuint draw_fb_handle); void EnsureTextureDiscrepantView(); OGLTexture texture; OGLTexture discrepant_view; - std::vector<std::vector<u8>> gl_buffer; SurfaceParams params{}; GLenum gl_target{}; GLenum gl_internal_format{}; @@ -473,6 +476,11 @@ public: void SignalPreDrawCall(); void SignalPostDrawCall(); +protected: + void FlushObjectInner(const Surface& object) override { + object->FlushGLBuffer(temporal_memory); + } + private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); @@ -519,6 +527,8 @@ private: std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; + RasterizerTemporaryMemory temporal_memory; + using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; using SurfaceInterval = typename SurfaceIntervalCache::interval_type; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b1c8f7c35..f700dc89a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, const Device& device) - : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {} + : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a332087f8..31b979987 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -57,9 +57,6 @@ public: return shader_length; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - /// Gets the shader entries for the shader const GLShader::ShaderEntries& GetShaderEntries() const { return entries; @@ -123,6 +120,10 @@ public: /// Gets the current specified shader stage program Shader GetStageProgram(Maxwell::ShaderProgram program); +protected: + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const Shader& object) override {} + private: std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ef1a1995f..1a62795e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -871,17 +871,6 @@ private: return {}; } - std::string Composite(Operation operation) { - std::string value = "vec4("; - for (std::size_t i = 0; i < 4; ++i) { - value += Visit(operation[i]); - if (i < 3) - value += ", "; - } - value += ')'; - return value; - } - template <Type type> std::string Add(Operation operation) { return GenerateBinaryInfix(operation, "+", type, type, type); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 6abf948f8..7ab0b4553 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); out += program.first; if (setup.IsDualProgram()) { - ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); + const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); ProgramResult program_b = Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); @@ -76,7 +76,7 @@ void main() { } })"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { @@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); out += program.first; @@ -107,7 +107,7 @@ void main() { execute_geometry(); };)"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { @@ -160,7 +160,7 @@ bool AlphaFunc(in float value) { } )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); @@ -172,7 +172,7 @@ void main() { } )"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 95b773135..ed7b5cff0 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -126,6 +126,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; + case Maxwell::PrimitiveTopology::TriangleFan: + return GL_TRIANGLE_FAN; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 08b786aad..3edf460df 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -49,9 +49,6 @@ public: return alignment; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - private: VAddr cpu_addr{}; std::size_t size{}; @@ -87,6 +84,10 @@ public: return buffer_handle; } +protected: + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + private: void AlignBuffer(std::size_t alignment); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 23d9b10db..a11000f6b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -315,7 +315,6 @@ private: constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", "overflow"}; for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 8b574d4e5..5b033126d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -540,7 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, bool is_array, bool is_aoffi) { const std::size_t coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index eafb6b73a..a9b8f69af 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -25,8 +25,8 @@ class InputBitStream { public: - explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0) - : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} + explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) + : m_CurByte(ptr), m_NextBit(start_offset % 8) {} ~InputBitStream() = default; @@ -55,12 +55,9 @@ public: } private: - const int m_NumBits; const unsigned char* m_CurByte; int m_NextBit = 0; int m_BitsRead = 0; - - bool done = false; }; class OutputBitStream { @@ -114,7 +111,6 @@ private: const int m_NumBits; unsigned char* m_CurByte; int m_NextBit = 0; - int m_BitsRead = 0; bool done = false; }; |