diff options
Diffstat (limited to 'src/video_core/renderer_opengl/gl_rasterizer_cache.cpp')
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 285 |
1 files changed, 178 insertions, 107 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 884637ca5..2ffbd3bab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -21,9 +21,13 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/vector_math.h" +#include "core/core.h" #include "core/frontend/emu_window.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/utils.h" @@ -107,67 +111,28 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { } template <bool morton_to_gl, PixelFormat format> -static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { +static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 tile_size = bytes_per_pixel * 64; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); - gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; - - const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); - const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); - const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); - - ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); - - const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; - u32 x = static_cast<u32>((begin_pixel_index % (stride * 8)) / 8); - u32 y = static_cast<u32>((begin_pixel_index / (stride * 8)) * 8); - - gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; - - auto glbuf_next_tile = [&] { - x = (x + 8) % stride; - gl_buffer += 8 * gl_bytes_per_pixel; - if (!x) { - y += 8; - gl_buffer -= stride * 9 * gl_bytes_per_pixel; - } - }; - u8* tile_buffer = Memory::GetPhysicalPointer(start); - - if (start < aligned_start && !morton_to_gl) { - std::array<u8, tile_size> tmp_buf; - MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], - std::min(aligned_start, end) - start); - - tile_buffer += aligned_start - start; - glbuf_next_tile(); - } - - const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; - while (tile_buffer < buffer_end) { - MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer); - tile_buffer += tile_size; - glbuf_next_tile(); - } - - if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { - std::array<u8, tile_size> tmp_buf; - MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); - } + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the + // configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); } -static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::RGBA8>, // 0 - MortonCopy<true, PixelFormat::RGB8>, // 1 - MortonCopy<true, PixelFormat::RGB5A1>, // 2 - MortonCopy<true, PixelFormat::RGB565>, // 3 - MortonCopy<true, PixelFormat::RGBA4>, // 4 +static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> morton_to_gl_fns = { + MortonCopy<true, PixelFormat::RGBA8>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -176,19 +141,19 @@ static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> mo nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy<true, PixelFormat::D16>, // 14 - nullptr, // 15 - MortonCopy<true, PixelFormat::D24>, // 16 - MortonCopy<true, PixelFormat::D24S8> // 17 }; -static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::RGBA8>, // 0 - MortonCopy<false, PixelFormat::RGB8>, // 1 - MortonCopy<false, PixelFormat::RGB5A1>, // 2 - MortonCopy<false, PixelFormat::RGB565>, // 3 - MortonCopy<false, PixelFormat::RGBA4>, // 4 +static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> gl_to_morton_fns = { + MortonCopy<false, PixelFormat::RGBA8>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -197,11 +162,6 @@ static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy<false, PixelFormat::D16>, // 14 - nullptr, // 15 - MortonCopy<false, PixelFormat::D24>, // 16 - MortonCopy<false, PixelFormat::D24S8> // 17 }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -290,17 +250,17 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec static bool FillSurface(const Surface& surface, const u8* fill_data, const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { - UNIMPLEMENTED(); - return true; + UNREACHABLE(); + return {}; } SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; const u32 tiled_size = is_tiled ? 8 : 1; const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - PAddr aligned_start = + VAddr aligned_start = addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - PAddr aligned_end = + VAddr aligned_end = addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); if (aligned_end - aligned_start > stride_tiled_bytes) { @@ -527,10 +487,10 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { +void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { ASSERT(type != SurfaceType::Fill); - const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + u8* const texture_src_data = Memory::GetPointer(addr); if (texture_src_data == nullptr) return; @@ -539,35 +499,30 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { gl_buffer.reset(new u8[gl_buffer_size]); } - // TODO: Should probably be done in ::Memory:: and check for other regions too - if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) - load_end = Memory::VRAM_VADDR_END; - - if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) - load_start = Memory::VRAM_VADDR; - MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); ASSERT(load_start >= addr && load_end <= end); - const u32 start_offset = load_start - addr; + const u64 start_offset = load_start - addr; if (!is_tiled) { ASSERT(type == SurfaceType::Color); - std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, - load_end - load_start); + const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, + texture_src_data + start_offset, &gl_buffer[start_offset], + true); } else { - if (type == SurfaceType::Texture) { - UNIMPLEMENTED(); - } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - load_start, load_end); - } + morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, + load_start, load_end); } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { - u8* const dst_buffer = Memory::GetPhysicalPointer(addr); +void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { + u8* const dst_buffer = Memory::GetPointer(addr); if (dst_buffer == nullptr) return; @@ -1102,18 +1057,106 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& } Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport_rect) { - UNIMPLEMENTED(); - return {}; + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + const auto& config = regs.rt[0]; + + // TODO(bunnei): This is hard corded to use just the first render buffer + LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); + + // update resolution_scale_factor and reset cache if changed + // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We + // need to fix this before making the renderer multi-threaded. + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + } + + MathUtil::Rectangle<u32> viewport_clamped{ + static_cast<u32>(MathUtil::Clamp(viewport.left, 0, static_cast<s32>(config.width))), + static_cast<u32>(MathUtil::Clamp(viewport.top, 0, static_cast<s32>(config.height))), + static_cast<u32>(MathUtil::Clamp(viewport.right, 0, static_cast<s32>(config.width))), + static_cast<u32>(MathUtil::Clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.width; + color_params.height = config.height; + SurfaceParams depth_params = color_params; + + color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); + color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); + color_params.UpdateParams(); + + ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); + // depth_params.addr = config.GetDepthBufferPhysicalAddress(); + // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + // depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + MathUtil::Rectangle<u32> color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle<u32> depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle<u32> fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); } Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } @@ -1167,7 +1210,7 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, } } -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u64 size) { +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) { if (size == 0) return; @@ -1227,7 +1270,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, } } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surface) { +void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) { if (size == 0) return; @@ -1260,10 +1303,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surf } void RasterizerCacheOpenGL::FlushAll() { - FlushRegion(0, 0xFFFFFFFF); + FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } -void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner) { +void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { if (size == 0) return; @@ -1356,6 +1399,34 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); } -void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u64 size, int delta) { - UNIMPLEMENTED(); +void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + const u64 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u64 page_start = addr >> Memory::PAGE_BITS; + const u64 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); } |