From 4415e00181f71b35eea779157976773d9bccf638 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 24 Apr 2018 00:19:36 -0400 Subject: gl_rasterizer_cache: Update to be based on GPU addresses, not CPU addresses. --- src/video_core/memory_manager.h | 1 - src/video_core/rasterizer_interface.h | 7 +-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 +++-- src/video_core/renderer_opengl/gl_rasterizer.h | 7 +-- .../renderer_opengl/gl_rasterizer_cache.cpp | 63 +++++++++++++--------- .../renderer_opengl/gl_rasterizer_cache.h | 27 ++++++---- src/video_core/renderer_opengl/renderer_opengl.cpp | 3 +- src/video_core/textures/decoders.cpp | 1 + 8 files changed, 72 insertions(+), 50 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 7d745101f..08140c83a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -11,7 +11,6 @@ #include #include "common/common_types.h" -#include "core/memory.h" namespace Tegra { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 36629dd11..f0e48a802 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "video_core/gpu.h" +#include "video_core/memory_manager.h" struct ScreenInfo; @@ -25,14 +26,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(VAddr addr, u64 size) = 0; + virtual void FlushRegion(Tegra::GPUVAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + virtual void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated - virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 virtual bool AccelerateDisplayTransfer(const void* config) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 71612790b..bc9368877 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -150,9 +150,8 @@ std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, u64 size = end - start + 1; // Copy vertex array data - const VAddr data_addr{*memory_manager->GpuToCpuAddress(start)}; - res_cache.FlushRegion(data_addr, size, nullptr); - Memory::ReadBlock(data_addr, array_ptr, size); + res_cache.FlushRegion(start, size, nullptr); + Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); // Bind the vertex array to the buffer at the current offset. glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); @@ -519,17 +518,17 @@ void RasterizerOpenGL::FlushAll() { res_cache.FlushAll(); } -void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size, nullptr); } -void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); res_cache.InvalidateRegion(addr, size, nullptr); @@ -560,7 +559,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu MICROPROFILE_SCOPE(OpenGL_CacheManagement); SurfaceParams src_params; - src_params.addr = framebuffer_addr; + src_params.cpu_addr = framebuffer_addr; src_params.width = std::min(framebuffer.width, pixel_stride); src_params.height = framebuffer.height; src_params.stride = pixel_stride; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 544714b95..9709e595e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -11,6 +11,7 @@ #include #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -29,9 +30,9 @@ public: void DrawArrays() override; void NotifyMaxwellRegisterChanged(u32 method) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(Tegra::GPUVAddr addr, u64 size) override; + void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; + void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; bool AccelerateDisplayTransfer(const void* config) override; bool AccelerateTextureCopy(const void* config) override; bool AccelerateFill(const void* config) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ced648c12..d139d51e9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -83,26 +83,30 @@ static u16 GetResolutionScaleFactor() { } template -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, - VAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, + Tegra::GPUVAddr start, Tegra::GPUVAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { auto data = Tegra::Texture::UnswizzleTexture( - base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, - block_height); + *gpu.memory_manager->GpuToCpuAddress(base), + SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); std::memcpy(gl_buffer, data.data(), data.size()); } else { // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check // the configuration for this and perform more generic un/swizzle LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(base), gl_buffer, morton_to_gl); + VideoCore::MortonCopyPixels128( + stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, + morton_to_gl); } } -static constexpr std::array morton_to_gl_fns = { MortonCopy, MortonCopy, @@ -110,7 +114,8 @@ static constexpr std::array, MortonCopy, }; -static constexpr std::array gl_to_morton_fns = { MortonCopy, @@ -219,9 +224,9 @@ SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; const u32 tiled_size = is_tiled ? 8 : 1; const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - VAddr aligned_start = + Tegra::GPUVAddr aligned_start = addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - VAddr aligned_end = + Tegra::GPUVAddr aligned_end = addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); if (aligned_end - aligned_start > stride_tiled_bytes) { @@ -342,6 +347,13 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); } +VAddr SurfaceParams::GetCpuAddr() const { + // When this function is used, only cpu_addr or (GPU) addr should be set, not both + ASSERT(!(cpu_addr && addr)); + const auto& gpu = Core::System::GetInstance().GPU(); + return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); +} + bool CachedSurface::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const { if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && @@ -456,10 +468,10 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { +void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { ASSERT(type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(addr); + u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); if (texture_src_data == nullptr) return; @@ -485,8 +497,8 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { - u8* const dst_buffer = Memory::GetPointer(addr); +void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { + u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); if (dst_buffer == nullptr) return; @@ -1028,7 +1040,7 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu auto& gpu = Core::System::GetInstance().GPU(); SurfaceParams params; - params.addr = *gpu.memory_manager->GpuToCpuAddress(config.tic.Address()); + params.addr = config.tic.Address(); params.width = config.tic.Width(); params.height = config.tic.Height(); params.is_tiled = config.tic.IsTiled(); @@ -1045,7 +1057,7 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.block_height = config.tic.BlockHeight(); } else { // Use the texture-provided stride value if the texture isn't tiled. - params.stride = params.PixelsInBytes(config.tic.Pitch()); + params.stride = static_cast(params.PixelsInBytes(config.tic.Pitch())); } params.UpdateParams(); @@ -1073,7 +1085,6 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; const auto& config = regs.rt[0]; // TODO(bunnei): This is hard corded to use just the first render buffer @@ -1106,7 +1117,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; SurfaceParams depth_params = color_params; - color_params.addr = *memory_manager->GpuToCpuAddress(config.Address()); + color_params.addr = config.Address(); color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); color_params.UpdateParams(); @@ -1222,7 +1233,8 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, } } -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) { +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, + u64 size) { if (size == 0) return; @@ -1261,7 +1273,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, } } -void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) { +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { if (size == 0) return; @@ -1297,7 +1309,8 @@ void RasterizerCacheOpenGL::FlushAll() { FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } -void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, + const Surface& region_owner) { if (size == 0) return; @@ -1390,7 +1403,7 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); } -void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { +void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { const u64 num_pages = ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; const u64 page_start = addr >> Memory::PAGE_BITS; @@ -1406,8 +1419,10 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int del const auto interval = pair.first & pages_interval; const int count = pair.second; - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) + << Memory::PAGE_BITS; + const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) + << Memory::PAGE_BITS; const u64 interval_size = interval_end_addr - interval_start_addr; if (delta > 0 && count == delta) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index bf0fabb29..5f77f4e61 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -17,12 +17,14 @@ #ifdef __GNUC__ #pragma GCC diagnostic pop #endif +#include #include #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" #include "video_core/gpu.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" @@ -30,9 +32,9 @@ struct CachedSurface; using Surface = std::shared_ptr; using SurfaceSet = std::set; -using SurfaceRegions = boost::icl::interval_set; -using SurfaceMap = boost::icl::interval_map; -using SurfaceCache = boost::icl::interval_map; +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = boost::icl::interval_map; +using SurfaceCache = boost::icl::interval_map; using SurfaceInterval = SurfaceCache::interval_type; static_assert(std::is_same() && @@ -277,6 +279,8 @@ struct SurfaceParams { return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; } + VAddr GetCpuAddr() const; + bool ExactMatch(const SurfaceParams& other_surface) const; bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanExpand(const SurfaceParams& expanded_surface) const; @@ -285,8 +289,9 @@ struct SurfaceParams { MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; - VAddr addr = 0; - VAddr end = 0; + Tegra::GPUVAddr addr = 0; + Tegra::GPUVAddr end = 0; + boost::optional cpu_addr; u64 size = 0; u32 width = 0; @@ -332,8 +337,8 @@ struct CachedSurface : SurfaceParams { size_t gl_buffer_size = 0; // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(VAddr load_start, VAddr load_end); - void FlushGLBuffer(VAddr flush_start, VAddr flush_end); + void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); + void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); // Upload/Download data in gl_buffer in/to this surface's texture void UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, @@ -381,10 +386,10 @@ public: SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr); + void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); /// Mark region as being invalidated by region_owner (nullptr if Switch memory) - void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner); + void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); @@ -393,7 +398,7 @@ private: void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, VAddr addr, u64 size); + void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); /// Create a new surface Surface CreateSurface(const SurfaceParams& params); @@ -405,7 +410,7 @@ private: void UnregisterSurface(const Surface& surface); /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(VAddr addr, u64 size, int delta); + void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); SurfaceCache surface_cache; PageMap cached_pages; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index baff2c7af..a266e21cf 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -152,7 +152,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf screen_info.display_texture = screen_info.texture.resource.handle; screen_info.display_texcoords = MathUtil::Rectangle(0.f, 0.f, 1.f, 1.f); - Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes); + Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, + Memory::FlushMode::Flush); VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, Memory::GetPointer(framebuffer_addr), diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index e0509f0ce..9c3ae875c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -4,6 +4,7 @@ #include #include "common/assert.h" +#include "core/memory.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" -- cgit v1.2.3