diff options
Diffstat (limited to 'src/video_core')
29 files changed, 2218 insertions, 541 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e56253c4c..a710c4bc5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,8 @@ add_library(video_core STATIC command_processor.cpp command_processor.h + debug_utils/debug_utils.cpp + debug_utils/debug_utils.h engines/fermi_2d.cpp engines/fermi_2d.h engines/maxwell_3d.cpp @@ -9,6 +11,8 @@ add_library(video_core STATIC engines/maxwell_compute.h gpu.cpp gpu.h + macro_interpreter.cpp + macro_interpreter.h memory_manager.cpp memory_manager.h rasterizer_interface.h @@ -29,8 +33,12 @@ add_library(video_core STATIC renderer_opengl/gl_state.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h + renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h + textures/decoders.cpp + textures/decoders.h + textures/texture.h utils.h video_core.cpp video_core.h diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp new file mode 100644 index 000000000..22d44aab2 --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -0,0 +1,64 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include <algorithm> +#include <condition_variable> +#include <cstdint> +#include <cstring> +#include <fstream> +#include <map> +#include <mutex> +#include <string> + +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/color.h" +#include "common/common_types.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/vector_math.h" +#include "video_core/debug_utils/debug_utils.h" + +namespace Tegra { + +void DebugContext::DoOnEvent(Event event, void* data) { + { + std::unique_lock<std::mutex> lock(breakpoint_mutex); + + // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will + // show on debug widgets + + // TODO: Should stop the CPU thread here once we multithread emulation. + + active_breakpoint = event; + at_breakpoint = true; + + // Tell all observers that we hit a breakpoint + for (auto& breakpoint_observer : breakpoint_observers) { + breakpoint_observer->OnMaxwellBreakPointHit(event, data); + } + + // Wait until another thread tells us to Resume() + resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; }); + } +} + +void DebugContext::Resume() { + { + std::lock_guard<std::mutex> lock(breakpoint_mutex); + + // Tell all observers that we are about to resume + for (auto& breakpoint_observer : breakpoint_observers) { + breakpoint_observer->OnMaxwellResume(); + } + + // Resume the waiting thread (i.e. OnEvent()) + at_breakpoint = false; + } + + resume_from_breakpoint.notify_one(); +} + +} // namespace Tegra diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h new file mode 100644 index 000000000..bbba8e380 --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.h @@ -0,0 +1,163 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <array> +#include <condition_variable> +#include <iterator> +#include <list> +#include <map> +#include <memory> +#include <mutex> +#include <string> +#include <utility> +#include <vector> +#include "common/common_types.h" +#include "common/vector_math.h" + +namespace Tegra { + +class DebugContext { +public: + enum class Event { + FirstEvent = 0, + + MaxwellCommandLoaded = FirstEvent, + MaxwellCommandProcessed, + IncomingPrimitiveBatch, + FinishedPrimitiveBatch, + + NumEvents + }; + + /** + * Inherit from this class to be notified of events registered to some debug context. + * Most importantly this is used for our debugger GUI. + * + * To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods. + * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state + * access + * @todo Evaluate an alternative interface, in which there is only one managing observer and + * multiple child observers running (by design) on the same thread. + */ + class BreakPointObserver { + public: + /// Constructs the object such that it observes events of the given DebugContext. + BreakPointObserver(std::shared_ptr<DebugContext> debug_context) + : context_weak(debug_context) { + std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); + debug_context->breakpoint_observers.push_back(this); + } + + virtual ~BreakPointObserver() { + auto context = context_weak.lock(); + if (context) { + std::unique_lock<std::mutex> lock(context->breakpoint_mutex); + context->breakpoint_observers.remove(this); + + // If we are the last observer to be destroyed, tell the debugger context that + // it is free to continue. In particular, this is required for a proper yuzu + // shutdown, when the emulation thread is waiting at a breakpoint. + if (context->breakpoint_observers.empty()) + context->Resume(); + } + } + + /** + * Action to perform when a breakpoint was reached. + * @param event Type of event which triggered the breakpoint + * @param data Optional data pointer (if unused, this is a nullptr) + * @note This function will perform nothing unless it is overridden in the child class. + */ + virtual void OnMaxwellBreakPointHit(Event event, void* data) {} + + /** + * Action to perform when emulation is resumed from a breakpoint. + * @note This function will perform nothing unless it is overridden in the child class. + */ + virtual void OnMaxwellResume() {} + + protected: + /** + * Weak context pointer. This need not be valid, so when requesting a shared_ptr via + * context_weak.lock(), always compare the result against nullptr. + */ + std::weak_ptr<DebugContext> context_weak; + }; + + /** + * Simple structure defining a breakpoint state + */ + struct BreakPoint { + bool enabled = false; + }; + + /** + * Static constructor used to create a shared_ptr of a DebugContext. + */ + static std::shared_ptr<DebugContext> Construct() { + return std::shared_ptr<DebugContext>(new DebugContext); + } + + /** + * Used by the emulation core when a given event has happened. If a breakpoint has been set + * for this event, OnEvent calls the event handlers of the registered breakpoint observers. + * The current thread then is halted until Resume() is called from another thread (or until + * emulation is stopped). + * @param event Event which has happened + * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until + * Resume() is called. + */ + void OnEvent(Event event, void* data) { + // This check is left in the header to allow the compiler to inline it. + if (!breakpoints[(int)event].enabled) + return; + // For the rest of event handling, call a separate function. + DoOnEvent(event, data); + } + + void DoOnEvent(Event event, void* data); + + /** + * Resume from the current breakpoint. + * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. + * Calling from any other thread is safe. + */ + void Resume(); + + /** + * Delete all set breakpoints and resume emulation. + */ + void ClearBreakpoints() { + for (auto& bp : breakpoints) { + bp.enabled = false; + } + Resume(); + } + + // TODO: Evaluate if access to these members should be hidden behind a public interface. + std::array<BreakPoint, (int)Event::NumEvents> breakpoints; + Event active_breakpoint; + bool at_breakpoint = false; + +private: + /** + * Private default constructor to make sure people always construct this through Construct() + * instead. + */ + DebugContext() = default; + + /// Mutex protecting current breakpoint state and the observer list. + std::mutex breakpoint_mutex; + + /// Used by OnEvent to wait for resumption. + std::condition_variable resume_from_breakpoint; + + /// List of registered observers + std::list<BreakPointObserver*> breakpoint_observers; +}; + +} // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 4d9745e48..124753032 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -2,8 +2,16 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cinttypes> #include "common/assert.h" +#include "core/core.h" +#include "video_core/debug_utils/debug_utils.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" +#include "video_core/textures/decoders.h" +#include "video_core/textures/texture.h" +#include "video_core/video_core.h" namespace Tegra { namespace Engines { @@ -11,41 +19,29 @@ namespace Engines { /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; -const std::unordered_map<u32, Maxwell3D::MethodInfo> Maxwell3D::method_handlers = { - {0xE1A, {"BindTextureInfoBuffer", 1, &Maxwell3D::BindTextureInfoBuffer}}, - {0xE24, {"SetShader", 5, &Maxwell3D::SetShader}}, - {0xE2A, {"BindStorageBuffer", 1, &Maxwell3D::BindStorageBuffer}}, -}; - -Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} +Maxwell3D::Maxwell3D(MemoryManager& memory_manager) + : memory_manager(memory_manager), macro_interpreter(*this) {} void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) { uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code); } -void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) { - // TODO(Subv): Write an interpreter for the macros uploaded via registers 0x45 and 0x47 - +void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { + auto macro_code = uploaded_macros.find(method); // The requested macro must have been uploaded already. - ASSERT_MSG(uploaded_macros.find(method) != uploaded_macros.end(), "Macro %08X was not uploaded", - method); - - auto itr = method_handlers.find(method); - ASSERT_MSG(itr != method_handlers.end(), "Unhandled method call %08X", method); - - ASSERT(itr->second.arguments == parameters.size()); + ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method); - (this->*itr->second.handler)(parameters); - - // Reset the current macro and its parameters. + // Reset the current macro and execute it. executing_macro = 0; - macro_params.clear(); + macro_interpreter.Execute(macro_code->second, std::move(parameters)); } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); + auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); + // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. if (executing_macro != 0) { @@ -67,11 +63,15 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { // Call the macro when there are no more parameters in the command buffer if (remaining_params == 0) { - CallMacroMethod(executing_macro, macro_params); + CallMacroMethod(executing_macro, std::move(macro_params)); } return; } + if (debug_context) { + debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); + } + regs.reg_array[method] = value; #define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32)) @@ -137,6 +137,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { } #undef MAXWELL3D_REG_INDEX + + if (debug_context) { + debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr); + } } void Maxwell3D::ProcessQueryGet() { @@ -159,85 +163,20 @@ void Maxwell3D::ProcessQueryGet() { } void Maxwell3D::DrawArrays() { - LOG_WARNING(HW_GPU, "Game requested a DrawArrays, ignoring"); -} + LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(), + regs.vertex_buffer.count); -void Maxwell3D::BindTextureInfoBuffer(const std::vector<u32>& parameters) { - /** - * Parameters description: - * [0] = Shader stage, usually 4 for FragmentShader - */ + auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); - u32 stage = parameters[0]; - - // Perform the same operations as the real macro code. - GPUVAddr address = static_cast<GPUVAddr>(regs.tex_info_buffers.address[stage]) << 8; - u32 size = regs.tex_info_buffers.size[stage]; - - regs.const_buffer.cb_size = size; - regs.const_buffer.cb_address_high = address >> 32; - regs.const_buffer.cb_address_low = address & 0xFFFFFFFF; -} - -void Maxwell3D::SetShader(const std::vector<u32>& parameters) { - /** - * Parameters description: - * [0] = Shader Program. - * [1] = Unknown, presumably the shader id. - * [2] = Offset to the start of the shader, after the 0x30 bytes header. - * [3] = Shader Stage. - * [4] = Const Buffer Address >> 8. - */ - auto shader_program = static_cast<Regs::ShaderProgram>(parameters[0]); - // TODO(Subv): This address is probably an offset from the CODE_ADDRESS register. - GPUVAddr address = parameters[2]; - auto shader_stage = static_cast<Regs::ShaderStage>(parameters[3]); - GPUVAddr cb_address = parameters[4] << 8; - - auto& shader = state.shader_programs[static_cast<size_t>(shader_program)]; - shader.program = shader_program; - shader.stage = shader_stage; - shader.address = address; - - // Perform the same operations as the real macro code. - // TODO(Subv): Early exit if register 0xD1C + shader_program contains the same as params[1]. - auto& shader_regs = regs.shader_config[static_cast<size_t>(shader_program)]; - shader_regs.start_id = address; - // TODO(Subv): Write params[1] to register 0xD1C + shader_program. - // TODO(Subv): Write params[2] to register 0xD22 + shader_program. - - // Note: This value is hardcoded in the macro's code. - static constexpr u32 DefaultCBSize = 0x10000; - regs.const_buffer.cb_size = DefaultCBSize; - regs.const_buffer.cb_address_high = cb_address >> 32; - regs.const_buffer.cb_address_low = cb_address & 0xFFFFFFFF; - - // Write a hardcoded 0x11 to CB_BIND, this binds the current const buffer to buffer c1[] in the - // shader. It's likely that these are the constants for the shader. - regs.cb_bind[static_cast<size_t>(shader_stage)].valid.Assign(1); - regs.cb_bind[static_cast<size_t>(shader_stage)].index.Assign(1); - - ProcessCBBind(shader_stage); -} - -void Maxwell3D::BindStorageBuffer(const std::vector<u32>& parameters) { - /** - * Parameters description: - * [0] = Buffer offset >> 2 - */ - - u32 buffer_offset = parameters[0] << 2; - - // Perform the same operations as the real macro code. - // Note: This value is hardcoded in the macro's code. - static constexpr u32 DefaultCBSize = 0x5F00; - regs.const_buffer.cb_size = DefaultCBSize; + if (debug_context) { + debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); + } - GPUVAddr address = regs.ssbo_info.BufferAddress(); - regs.const_buffer.cb_address_high = address >> 32; - regs.const_buffer.cb_address_low = address & 0xFFFFFFFF; + if (debug_context) { + debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } - regs.const_buffer.cb_pos = buffer_offset; + VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/); } void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { @@ -270,5 +209,95 @@ void Maxwell3D::ProcessCBData(u32 value) { regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; } +Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { + GPUVAddr tic_base_address = regs.tic.TICAddress(); + + GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); + VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu); + + Texture::TICEntry tic_entry; + Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); + + ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear, + "TIC versions other than BlockLinear are unimplemented"); + + ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D, + "Texture types other than Texture2D are unimplemented"); + + auto r_type = tic_entry.r_type.Value(); + auto g_type = tic_entry.g_type.Value(); + auto b_type = tic_entry.b_type.Value(); + auto a_type = tic_entry.a_type.Value(); + + // TODO(Subv): Different data types for separate components are not supported + ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); + + return tic_entry; +} + +Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { + GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); + + GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); + VAddr tsc_address_cpu = memory_manager.PhysicalToVirtualAddress(tsc_address_gpu); + + Texture::TSCEntry tsc_entry; + Memory::ReadBlock(tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); + return tsc_entry; +} + +std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { + std::vector<Texture::FullTextureInfo> textures; + + auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)]; + auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; + ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); + + GPUVAddr tic_base_address = regs.tic.TICAddress(); + + GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; + + // Offset into the texture constbuffer where the texture info begins. + static constexpr size_t TextureInfoOffset = 0x20; + + for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; + current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { + + Texture::TextureHandle tex_handle{ + Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))}; + + Texture::FullTextureInfo tex_info{}; + // TODO(Subv): Use the shader to determine which textures are actually accessed. + tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) / + sizeof(Texture::TextureHandle); + + // Load the TIC data. + if (tex_handle.tic_id != 0) { + tex_info.enabled = true; + + auto tic_entry = GetTICEntry(tex_handle.tic_id); + // TODO(Subv): Workaround for BitField's move constructor being deleted. + std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); + } + + // Load the TSC data + if (tex_handle.tsc_id != 0) { + auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); + // TODO(Subv): Workaround for BitField's move constructor being deleted. + std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); + } + + if (tex_info.enabled) + textures.push_back(tex_info); + } + + return textures; +} + +u32 Maxwell3D::GetRegisterValue(u32 method) const { + ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); + return regs.reg_array[method]; +} + } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index aab282b77..98b39b2ff 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -7,10 +7,15 @@ #include <array> #include <unordered_map> #include <vector> +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/gpu.h" +#include "video_core/macro_interpreter.h" #include "video_core/memory_manager.h" +#include "video_core/textures/texture.h" namespace Tegra { namespace Engines { @@ -20,18 +25,13 @@ public: explicit Maxwell3D(MemoryManager& memory_manager); ~Maxwell3D() = default; - /// Write the value to the register identified by method. - void WriteReg(u32 method, u32 value, u32 remaining_params); - - /// Uploads the code for a GPU macro program associated with the specified entry. - void SubmitMacroCode(u32 entry, std::vector<u32> code); - /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. struct Regs { static constexpr size_t NUM_REGS = 0xE36; static constexpr size_t NumRenderTargets = 8; + static constexpr size_t NumViewports = 16; static constexpr size_t NumCBData = 16; static constexpr size_t NumVertexArrays = 32; static constexpr size_t NumVertexAttributes = 32; @@ -62,6 +62,192 @@ public: Fragment = 4, }; + struct VertexAttribute { + enum class Size : u32 { + Size_32_32_32_32 = 0x01, + Size_32_32_32 = 0x02, + Size_16_16_16_16 = 0x03, + Size_32_32 = 0x04, + Size_16_16_16 = 0x05, + Size_8_8_8_8 = 0x0a, + Size_16_16 = 0x0f, + Size_32 = 0x12, + Size_8_8_8 = 0x13, + Size_8_8 = 0x18, + Size_16 = 0x1b, + Size_8 = 0x1d, + Size_10_10_10_2 = 0x30, + Size_11_11_10 = 0x31, + }; + + enum class Type : u32 { + SignedNorm = 1, + UnsignedNorm = 2, + SignedInt = 3, + UnsignedInt = 4, + UnsignedScaled = 5, + SignedScaled = 6, + Float = 7, + }; + + union { + BitField<0, 5, u32> buffer; + BitField<6, 1, u32> constant; + BitField<7, 14, u32> offset; + BitField<21, 6, Size> size; + BitField<27, 3, Type> type; + BitField<31, 1, u32> bgra; + }; + + u32 ComponentCount() const { + switch (size) { + case Size::Size_32_32_32_32: + return 4; + case Size::Size_32_32_32: + return 3; + case Size::Size_16_16_16_16: + return 4; + case Size::Size_32_32: + return 2; + case Size::Size_16_16_16: + return 3; + case Size::Size_8_8_8_8: + return 4; + case Size::Size_16_16: + return 2; + case Size::Size_32: + return 1; + case Size::Size_8_8_8: + return 3; + case Size::Size_8_8: + return 2; + case Size::Size_16: + return 1; + case Size::Size_8: + return 1; + case Size::Size_10_10_10_2: + return 4; + case Size::Size_11_11_10: + return 3; + default: + UNREACHABLE(); + } + } + + u32 SizeInBytes() const { + switch (size) { + case Size::Size_32_32_32_32: + return 16; + case Size::Size_32_32_32: + return 12; + case Size::Size_16_16_16_16: + return 8; + case Size::Size_32_32: + return 8; + case Size::Size_16_16_16: + return 6; + case Size::Size_8_8_8_8: + return 4; + case Size::Size_16_16: + return 4; + case Size::Size_32: + return 4; + case Size::Size_8_8_8: + return 3; + case Size::Size_8_8: + return 2; + case Size::Size_16: + return 2; + case Size::Size_8: + return 1; + case Size::Size_10_10_10_2: + return 4; + case Size::Size_11_11_10: + return 4; + default: + UNREACHABLE(); + } + } + + std::string SizeString() const { + switch (size) { + case Size::Size_32_32_32_32: + return "32_32_32_32"; + case Size::Size_32_32_32: + return "32_32_32"; + case Size::Size_16_16_16_16: + return "16_16_16_16"; + case Size::Size_32_32: + return "32_32"; + case Size::Size_16_16_16: + return "16_16_16"; + case Size::Size_8_8_8_8: + return "8_8_8_8"; + case Size::Size_16_16: + return "16_16"; + case Size::Size_32: + return "32"; + case Size::Size_8_8_8: + return "8_8_8"; + case Size::Size_8_8: + return "8_8"; + case Size::Size_16: + return "16"; + case Size::Size_8: + return "8"; + case Size::Size_10_10_10_2: + return "10_10_10_2"; + case Size::Size_11_11_10: + return "11_11_10"; + } + UNREACHABLE(); + return {}; + } + + std::string TypeString() const { + switch (type) { + case Type::SignedNorm: + return "SNORM"; + case Type::UnsignedNorm: + return "UNORM"; + case Type::SignedInt: + return "SINT"; + case Type::UnsignedInt: + return "UINT"; + case Type::UnsignedScaled: + return "USCALED"; + case Type::SignedScaled: + return "SSCALED"; + case Type::Float: + return "FLOAT"; + } + UNREACHABLE(); + return {}; + } + + bool IsNormalized() const { + return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); + } + }; + + enum class PrimitiveTopology : u32 { + Points = 0x0, + Lines = 0x1, + LineLoop = 0x2, + LineStrip = 0x3, + Triangles = 0x4, + TriangleStrip = 0x5, + TriangleFan = 0x6, + Quads = 0x7, + QuadStrip = 0x8, + Polygon = 0x9, + LinesAdjacency = 0xa, + LineStripAdjacency = 0xb, + TrianglesAdjacency = 0xc, + TriangleStripAdjacency = 0xd, + Patches = 0xe, + }; + union { struct { INSERT_PADDING_WORDS(0x200); @@ -69,9 +255,9 @@ public: struct { u32 address_high; u32 address_low; - u32 horiz; - u32 vert; - u32 format; + u32 width; + u32 height; + Tegra::RenderTargetFormat format; u32 block_dimensions; u32 array_mode; u32 layer_stride; @@ -84,7 +270,31 @@ public: } } rt[NumRenderTargets]; - INSERT_PADDING_WORDS(0xDD); + INSERT_PADDING_WORDS(0x80); + + struct { + union { + BitField<0, 16, u32> x; + BitField<16, 16, u32> width; + }; + union { + BitField<0, 16, u32> y; + BitField<16, 16, u32> height; + }; + float depth_range_near; + float depth_range_far; + + MathUtil::Rectangle<s32> GetRect() const { + return { + static_cast<s32>(x), // left + static_cast<s32>(y + height), // top + static_cast<s32>(x + width), // right + static_cast<s32>(y) // bottom + }; + }; + } viewport[NumViewports]; + + INSERT_PADDING_WORDS(0x1D); struct { u32 first; @@ -108,14 +318,7 @@ public: INSERT_PADDING_WORDS(0x5B); - union { - BitField<0, 5, u32> buffer; - BitField<6, 1, u32> constant; - BitField<7, 14, u32> offset; - BitField<21, 6, u32> size; - BitField<27, 3, u32> type; - BitField<31, 1, u32> bgra; - } vertex_attrib_format[NumVertexAttributes]; + VertexAttribute vertex_attrib_format[NumVertexAttributes]; INSERT_PADDING_WORDS(0xF); @@ -163,13 +366,15 @@ public: } } code_address; INSERT_PADDING_WORDS(1); + struct { u32 vertex_end_gl; union { u32 vertex_begin_gl; - BitField<0, 16, u32> topology; + BitField<0, 16, PrimitiveTopology> topology; }; } draw; + INSERT_PADDING_WORDS(0x139); struct { u32 query_address_high; @@ -294,22 +499,27 @@ public: bool enabled; }; - struct ShaderProgramInfo { - Regs::ShaderStage stage; - Regs::ShaderProgram program; - GPUVAddr address; - }; - struct ShaderStageInfo { std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers; }; std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; - std::array<ShaderProgramInfo, Regs::MaxShaderProgram> shader_programs; }; State state{}; + /// Reads a register value located at the input method address + u32 GetRegisterValue(u32 method) const; + + /// Write the value to the register identified by method. + void WriteReg(u32 method, u32 value, u32 remaining_params); + + /// Uploads the code for a GPU macro program associated with the specified entry. + void SubmitMacroCode(u32 entry, std::vector<u32> code); + + /// Returns a list of enabled textures for the specified shader stage. + std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; + private: MemoryManager& memory_manager; @@ -320,12 +530,21 @@ private: /// Parameters that have been submitted to the macro call so far. std::vector<u32> macro_params; + /// Interpreter for the macro codes uploaded to the GPU. + MacroInterpreter macro_interpreter; + + /// Retrieves information about a specific TIC entry from the TIC buffer. + Texture::TICEntry GetTICEntry(u32 tic_index) const; + + /// Retrieves information about a specific TSC entry from the TSC buffer. + Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; + /** * Call a macro on this engine. * @param method Method to call * @param parameters Arguments to the method call */ - void CallMacroMethod(u32 method, const std::vector<u32>& parameters); + void CallMacroMethod(u32 method, std::vector<u32> parameters); /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); @@ -338,19 +557,6 @@ private: /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); - - /// Method call handlers - void BindTextureInfoBuffer(const std::vector<u32>& parameters); - void SetShader(const std::vector<u32>& parameters); - void BindStorageBuffer(const std::vector<u32>& parameters); - - struct MethodInfo { - const char* name; - u32 arguments; - void (Maxwell3D::*handler)(const std::vector<u32>& parameters); - }; - - static const std::unordered_map<u32, MethodInfo> method_handlers; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -358,6 +564,7 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(rt, 0x200); +ASSERT_REG_POSITION(viewport, 0x300); ASSERT_REG_POSITION(vertex_buffer, 0x35D); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index c384d236e..9463cd5d6 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -18,4 +18,8 @@ GPU::GPU() { GPU::~GPU() = default; +const Tegra::Engines::Maxwell3D& GPU::Get3DEngine() const { + return *maxwell_3d; +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2a9064ba3..71a8661b4 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -8,10 +8,49 @@ #include <unordered_map> #include <vector> #include "common/common_types.h" +#include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/memory_manager.h" namespace Tegra { +enum class RenderTargetFormat : u32 { + NONE = 0x0, + RGBA8_UNORM = 0xD5, +}; + +class DebugContext; + +/** + * Struct describing framebuffer configuration + */ +struct FramebufferConfig { + enum class PixelFormat : u32 { + ABGR8 = 1, + }; + + /** + * Returns the number of bytes per pixel. + */ + static u32 BytesPerPixel(PixelFormat format) { + switch (format) { + case PixelFormat::ABGR8: + return 4; + } + + UNREACHABLE(); + } + + VAddr address; + u32 offset; + u32 width; + u32 height; + u32 stride; + PixelFormat pixel_format; + + using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; + TransformFlags transform_flags; +}; + namespace Engines { class Fermi2D; class Maxwell3D; @@ -34,8 +73,15 @@ public: /// Processes a command list stored at the specified address in GPU memory. void ProcessCommandList(GPUVAddr address, u32 size); + /// Returns a reference to the Maxwell3D GPU engine. + const Engines::Maxwell3D& Get3DEngine() const; + std::unique_ptr<MemoryManager> memory_manager; + Engines::Maxwell3D& Maxwell3D() { + return *maxwell_3d; + } + private: static constexpr u32 InvalidGraphMacroEntry = 0xFFFFFFFF; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp new file mode 100644 index 000000000..993a67746 --- /dev/null +++ b/src/video_core/macro_interpreter.cpp @@ -0,0 +1,257 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro_interpreter.h" + +namespace Tegra { + +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + +void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) { + Reset(); + registers[1] = parameters[0]; + this->parameters = std::move(parameters); + + // Execute the code until we hit an exit condition. + bool keep_executing = true; + while (keep_executing) { + keep_executing = Step(code, false); + } + + // Assert the the macro used all the input parameters + ASSERT(next_parameter_index == this->parameters.size()); +} + +void MacroInterpreter::Reset() { + registers = {}; + pc = 0; + delayed_pc = boost::none; + method_address.raw = 0; + parameters.clear(); + // The next parameter index starts at 1, because $r1 already has the value of the first + // parameter. + next_parameter_index = 1; +} + +bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { + u32 base_address = pc; + + Opcode opcode = GetOpcode(code); + pc += 4; + + // Update the program counter if we were delayed + if (delayed_pc != boost::none) { + ASSERT(is_delay_slot); + pc = *delayed_pc; + delayed_pc = boost::none; + } + + switch (opcode.operation) { + case Operation::ALU: { + u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), + GetRegister(opcode.src_b)); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::AddImmediate: { + ProcessResult(opcode.result_operation, opcode.dst, + GetRegister(opcode.src_a) + opcode.immediate); + break; + } + case Operation::ExtractInsert: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); + dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + dst |= src << opcode.bf_dst_bit; + ProcessResult(opcode.result_operation, opcode.dst, dst); + break; + } + case Operation::ExtractShiftLeftImmediate: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::ExtractShiftLeftRegister: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::Read: { + u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::Branch: { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + u32 value = GetRegister(opcode.src_a); + bool taken = EvaluateBranchCondition(opcode.branch_condition, value); + if (taken) { + // Ignore the delay slot if the branch has the annul bit. + if (opcode.branch_annul) { + pc = base_address + (opcode.immediate << 2); + return true; + } + + delayed_pc = base_address + (opcode.immediate << 2); + // Execute one more instruction due to the delay slot. + return Step(code, true); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation %u", + static_cast<u32>(opcode.operation.Value())); + } + + if (opcode.is_exit) { + // Exit has a delay slot, execute the next instruction + // Note: Executing an exit during a branch delay slot will cause the instruction at the + // branch target to be executed before exiting. + Step(code, true); + return false; + } + + return true; +} + +MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const { + ASSERT((pc % sizeof(u32)) == 0); + ASSERT(pc < code.size() * sizeof(u32)); + return {code[pc / sizeof(u32)]}; +} + +u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { + switch (operation) { + case ALUOperation::Add: + return src_a + src_b; + // TODO(Subv): Implement AddWithCarry + case ALUOperation::Subtract: + return src_a - src_b; + // TODO(Subv): Implement SubtractWithBorrow + case ALUOperation::Xor: + return src_a ^ src_b; + case ALUOperation::Or: + return src_a | src_b; + case ALUOperation::And: + return src_a & src_b; + case ALUOperation::AndNot: + return src_a & ~src_b; + case ALUOperation::Nand: + return ~(src_a & src_b); + + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation %u", static_cast<u32>(operation)); + } +} + +void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) { + switch (operation) { + case ResultOperation::IgnoreAndFetch: + // Fetch parameter and ignore result. + SetRegister(reg, FetchParameter()); + break; + case ResultOperation::Move: + // Move result. + SetRegister(reg, result); + break; + case ResultOperation::MoveAndSetMethod: + // Move result and use as Method Address. + SetRegister(reg, result); + SetMethodAddress(result); + break; + case ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, FetchParameter()); + Send(result); + break; + case ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, result); + Send(result); + break; + case ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, FetchParameter()); + SetMethodAddress(result); + break; + case ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, result); + SetMethodAddress(result); + Send(FetchParameter()); + break; + case ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, result); + SetMethodAddress(result); + Send((result >> 12) & 0b111111); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented result operation %u", static_cast<u32>(operation)); + } +} + +u32 MacroInterpreter::FetchParameter() { + ASSERT(next_parameter_index < parameters.size()); + return parameters[next_parameter_index++]; +} + +u32 MacroInterpreter::GetRegister(u32 register_id) const { + // Register 0 is supposed to always return 0. + if (register_id == 0) + return 0; + + ASSERT(register_id < registers.size()); + return registers[register_id]; +} + +void MacroInterpreter::SetRegister(u32 register_id, u32 value) { + // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero + // register. + if (register_id == 0) + return; + + ASSERT(register_id < registers.size()); + registers[register_id] = value; +} + +void MacroInterpreter::SetMethodAddress(u32 address) { + method_address.raw = address; +} + +void MacroInterpreter::Send(u32 value) { + maxwell3d.WriteReg(method_address.address, value, 0); + // Increment the method address by the method increment. + method_address.address.Assign(method_address.address.Value() + + method_address.increment.Value()); +} + +u32 MacroInterpreter::Read(u32 method) const { + return maxwell3d.GetRegisterValue(method); +} + +bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const { + switch (cond) { + case BranchCondition::Zero: + return value == 0; + case BranchCondition::NotZero: + return value != 0; + } + UNREACHABLE(); +} + +} // namespace Tegra diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h new file mode 100644 index 000000000..a71e359d8 --- /dev/null +++ b/src/video_core/macro_interpreter.h @@ -0,0 +1,164 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> +#include <boost/optional.hpp> +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Tegra { +namespace Engines { +class Maxwell3D; +} + +class MacroInterpreter final { +public: + explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); + + /** + * Executes the macro code with the specified input parameters. + * @param code The macro byte code to execute + * @param parameters The parameters of the macro + */ + void Execute(const std::vector<u32>& code, std::vector<u32> parameters); + +private: + enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, + }; + + enum class ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 + }; + + enum class ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 + }; + + enum class BranchCondition : u32 { + Zero = 0, + NotZero = 1, + }; + + union Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + BitField<5, 1, u32> + branch_annul; // If set on a branch, then the branch doesn't have a delay slot. + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + }; + + union MethodAddress { + u32 raw; + BitField<0, 12, u32> address; + BitField<12, 6, u32> increment; + }; + + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * @param code The macro code to execute. + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(const std::vector<u32>& code, bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Opcode GetOpcode(const std::vector<u32>& code) const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); + + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + Engines::Maxwell3D& maxwell3d; + + u32 pc; ///< Current program counter + boost::optional<u32> + delayed_pc; ///< Program counter to execute at after the delay slot is executed. + + static constexpr size_t NumMacroRegisters = 8; + + /// General purpose macro registers. + std::array<u32, NumMacroRegisters> registers = {}; + + /// Method address to use for the next Send instruction. + MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::vector<u32> parameters; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; +}; +} // namespace Tegra diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6c7bd0826..8239f9aad 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "video_core/gpu.h" struct ScreenInfo; @@ -14,8 +15,8 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} - /// Draw the current batch of triangles - virtual void DrawTriangles() = 0; + /// Draw the current batch of vertex arrays + virtual void DrawArrays() = 0; /// Notify rasterizer that the specified Maxwell register has been changed virtual void NotifyMaxwellRegisterChanged(u32 id) = 0; @@ -24,14 +25,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory - virtual void FlushRegion(PAddr addr, u32 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated - virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 virtual bool AccelerateDisplayTransfer(const void* config) { @@ -49,7 +50,8 @@ public: } /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, + virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, + VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; } diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 51e1d45f9..30075b23c 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -5,6 +5,11 @@ #include <atomic> #include <memory> #include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/video_core.h" -void RendererBase::RefreshRasterizerSetting() {} +void RendererBase::RefreshRasterizerSetting() { + if (rasterizer == nullptr) { + rasterizer = std::make_unique<RasterizerOpenGL>(); + } +} diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 2aba50eda..89a960eaf 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -8,6 +8,8 @@ #include <boost/optional.hpp> #include "common/assert.h" #include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/rasterizer_interface.h" class EmuWindow; @@ -16,40 +18,10 @@ public: /// Used to reference a framebuffer enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture }; - /** - * Struct describing framebuffer metadata - * TODO(bunnei): This struct belongs in the GPU code, but we don't have a good place for it yet. - */ - struct FramebufferInfo { - enum class PixelFormat : u32 { - ABGR8 = 1, - }; - - /** - * Returns the number of bytes per pixel. - */ - static u32 BytesPerPixel(PixelFormat format) { - switch (format) { - case PixelFormat::ABGR8: - return 4; - } - - UNREACHABLE(); - } - - VAddr address; - u32 offset; - u32 width; - u32 height; - u32 stride; - PixelFormat pixel_format; - bool flip_vertical; - }; - virtual ~RendererBase() {} /// Swap buffers (render frame) - virtual void SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) = 0; + virtual void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) = 0; /** * Set the emulator window to use for renderer @@ -74,12 +46,16 @@ public: return m_current_frame; } + VideoCore::RasterizerInterface* Rasterizer() const { + return rasterizer.get(); + } + void RefreshRasterizerSetting(); protected: + std::unique_ptr<VideoCore::RasterizerInterface> rasterizer; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer private: - bool opengl_rasterizer_active = false; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 24cfff229..911890f16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -14,11 +14,16 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/vector_math.h" +#include "core/core.h" +#include "core/hle/kernel/process.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PixelFormat = SurfaceParams::PixelFormat; using SurfaceType = SurfaceParams::SurfaceType; @@ -54,6 +59,8 @@ static void SetShaderUniformBlockBindings(GLuint shader) { } RasterizerOpenGL::RasterizerOpenGL() { + shader_dirty = true; + has_ARB_buffer_storage = false; has_ARB_direct_state_access = false; has_ARB_separate_shader_objects = false; @@ -106,8 +113,6 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.vertex_buffer = stream_buffer->GetHandle(); pipeline.Create(); - vs_input_index_min = 0; - vs_input_index_max = 0; state.draw.program_pipeline = pipeline.handle; state.draw.shader_program = 0; state.draw.vertex_array = hw_vao.handle; @@ -120,20 +125,14 @@ RasterizerOpenGL::RasterizerOpenGL() { glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); } else { - UNIMPLEMENTED(); + UNREACHABLE(); } accelerate_draw = AccelDraw::Disabled; glEnable(GL_BLEND); - // Sync fixed function OpenGL state - SyncClipEnabled(); - SyncClipCoef(); - SyncCullMode(); - SyncBlendEnabled(); - SyncBlendFuncs(); - SyncBlendColor(); + LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); } RasterizerOpenGL::~RasterizerOpenGL() { @@ -144,47 +143,235 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -static constexpr std::array<GLenum, 4> vs_attrib_types{ - GL_BYTE, // VertexAttributeFormat::BYTE - GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE - GL_SHORT, // VertexAttributeFormat::SHORT - GL_FLOAT // VertexAttributeFormat::FLOAT -}; - void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + if (is_indexed) { + UNREACHABLE(); + } + + // TODO(bunnei): Add support for 1+ vertex arrays + vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride; } void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + + state.draw.vertex_array = hw_vao.handle; + state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.Apply(); + + // TODO(bunnei): Add support for 1+ vertex arrays + const auto& vertex_array{regs.vertex_array[0]}; + ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); + ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); + for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { + ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); + } + + // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. + // Enables the first 16 vertex attributes always, as we don't know which ones are actually used + // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now + // to avoid OpenGL errors. + for (unsigned index = 0; index < 16; ++index) { + auto& attrib = regs.vertex_attrib_format[index]; + glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, + reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); + glEnableVertexAttribArray(index); + hw_vao_enabled_attributes[index] = true; + } + + // Copy vertex array data + const u32 data_size{vertex_array.stride * regs.vertex_buffer.count}; + const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; + res_cache.FlushRegion(data_addr, data_size, nullptr); + Memory::ReadBlock(data_addr, array_ptr, data_size); + + array_ptr += data_size; + buffer_offset += data_size; } void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VS); - UNIMPLEMENTED(); + LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); } void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_FS); - UNIMPLEMENTED(); + UNREACHABLE(); } bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { if (!has_ARB_separate_shader_objects) { - UNIMPLEMENTED(); + UNREACHABLE(); return false; } accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - DrawTriangles(); + DrawArrays(); return true; } -void RasterizerOpenGL::DrawTriangles() { +void RasterizerOpenGL::DrawArrays() { + if (accelerate_draw == AccelDraw::Disabled) + return; + MICROPROFILE_SCOPE(OpenGL_Drawing); - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + // TODO(bunnei): Implement these + const bool has_stencil = false; + const bool using_color_fb = true; + const bool using_depth_fb = false; + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()}; + + const bool write_color_fb = + state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + Surface color_surface; + Surface depth_surface; + MathUtil::Rectangle<u32> surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + MathUtil::Rectangle<u32> draw_rect{ + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + + viewport_rect.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + + viewport_rect.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + + viewport_rect.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + + viewport_rect.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + + // Bind the framebuffer surfaces + BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); + + // Sync the viewport + SyncViewport(surfaces_rect, res_scale); + + // TODO(bunnei): Sync framebuffer_scale uniform here + // TODO(bunnei): Sync scissorbox uniform(s) here + // TODO(bunnei): Sync and bind the texture surfaces + + // Sync and bind the shader + if (shader_dirty) { + SetShader(); + shader_dirty = false; + } + + // Sync the uniform data + if (uniform_block_data.dirty) { + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); + uniform_block_data.dirty = false; + } + + // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable + // scissor test to prevent drawing outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); + state.Apply(); + + // Draw the vertex batch + const bool is_indexed = accelerate_draw == AccelDraw::Indexed; + AnalyzeVertexArray(is_indexed); + state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.Apply(); + + size_t buffer_size = static_cast<size_t>(vs_input_size); + if (is_indexed) { + UNREACHABLE(); + } + buffer_size += sizeof(VSUniformData); + + size_t ptr_pos = 0; + u8* buffer_ptr; + GLintptr buffer_offset; + std::tie(buffer_ptr, buffer_offset) = + stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); + + SetupVertexArray(buffer_ptr, buffer_offset); + ptr_pos += vs_input_size; + + GLintptr index_buffer_offset = 0; + if (is_indexed) { + UNREACHABLE(); + } + + SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), + buffer_offset + static_cast<GLintptr>(ptr_pos)); + const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + ptr_pos += sizeof(VSUniformData); + + stream_buffer->Unmap(); + + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; + + copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); + + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + + if (is_indexed) { + UNREACHABLE(); + } else { + glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0, + regs.vertex_buffer.count); + } + + // Disable scissor test + state.scissor.enabled = false; + + accelerate_draw = AccelDraw::Disabled; + + // Unbind textures for potential future use as framebuffer attachments + for (auto& texture_unit : state.texture_units) { + texture_unit.texture_2d = 0; + } + state.Apply(); + + // Mark framebuffer surfaces as dirty + MathUtil::Rectangle<u32> draw_rect_unscaled{ + draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, + draw_rect.bottom / res_scale}; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } } void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} @@ -194,17 +381,17 @@ void RasterizerOpenGL::FlushAll() { res_cache.FlushAll(); } -void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size, nullptr); } -void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); res_cache.InvalidateRegion(addr, size, nullptr); @@ -212,58 +399,180 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); - UNIMPLEMENTED(); + UNREACHABLE(); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { - UNIMPLEMENTED(); + UNREACHABLE(); return true; } bool RasterizerOpenGL::AccelerateFill(const void* config) { - UNIMPLEMENTED(); + UNREACHABLE(); return true; } -bool RasterizerOpenGL::AccelerateDisplay(const void* config, PAddr framebuffer_addr, - u32 pixel_stride, ScreenInfo& screen_info) { - UNIMPLEMENTED(); +bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, + VAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + if (framebuffer_addr == 0) { + return false; + } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + + SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(framebuffer.width, pixel_stride); + src_params.height = framebuffer.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = + SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); + src_params.UpdateParams(); + + MathUtil::Rectangle<u32> src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + + if (src_surface == nullptr) { + return false; + } + + u32 scaled_width = src_surface->GetScaledWidth(); + u32 scaled_height = src_surface->GetScaledHeight(); + + screen_info.display_texcoords = MathUtil::Rectangle<float>( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.display_texture = src_surface->texture.handle; + return true; } void RasterizerOpenGL::SetShader() { - UNIMPLEMENTED(); + // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to + // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell + // shaders. + + static constexpr char vertex_shader[] = R"( +#version 150 core + +in vec2 vert_position; +in vec2 vert_tex_coord; +out vec2 frag_tex_coord; + +void main() { + // Multiply input position by the rotscale part of the matrix and then manually translate by + // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector + // to `vec3(vert_position.xy, 1.0)` + gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} +)"; + + static constexpr char fragment_shader[] = R"( +#version 150 core + +in vec2 frag_tex_coord; +out vec4 color; + +uniform sampler2D color_texture; + +void main() { + color = vec4(1.0, 0.0, 1.0, 0.0); +} +)"; + + if (current_shader) { + return; + } + + LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); + + current_shader = &test_shader; + if (has_ARB_separate_shader_objects) { + test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); + glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); + } else { + UNREACHABLE(); + } + + state.draw.shader_program = test_shader.shader.handle; + state.Apply(); + + if (has_ARB_separate_shader_objects) { + state.draw.shader_program = 0; + state.Apply(); + } +} + +void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, + const Surface& depth_surface, bool has_stencil) { + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (has_stencil) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } +} + +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()}; + + state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; + state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; + state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); + state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); } void RasterizerOpenGL::SyncClipEnabled() { - UNIMPLEMENTED(); + UNREACHABLE(); } void RasterizerOpenGL::SyncClipCoef() { - UNIMPLEMENTED(); + UNREACHABLE(); } void RasterizerOpenGL::SyncCullMode() { - UNIMPLEMENTED(); + UNREACHABLE(); } void RasterizerOpenGL::SyncDepthScale() { - UNIMPLEMENTED(); + UNREACHABLE(); } void RasterizerOpenGL::SyncDepthOffset() { - UNIMPLEMENTED(); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendEnabled() { - UNIMPLEMENTED(); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendFuncs() { - UNIMPLEMENTED(); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendColor() { - UNIMPLEMENTED(); + UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 893fc530f..fd53e94cd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -29,19 +29,25 @@ public: RasterizerOpenGL(); ~RasterizerOpenGL() override; - void DrawTriangles() override; + void DrawArrays() override; void NotifyMaxwellRegisterChanged(u32 id) override; void FlushAll() override; - void FlushRegion(PAddr addr, u32 size) override; - void InvalidateRegion(PAddr addr, u32 size) override; - void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateDisplayTransfer(const void* config) override; bool AccelerateTextureCopy(const void* config) override; bool AccelerateFill(const void* config) override; - bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, - ScreenInfo& screen_info) override; + bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info) override; bool AccelerateDrawBatch(bool is_indexed) override; + /// OpenGL shader generated for a given Maxwell register state + struct MaxwellShader { + /// OpenGL shader resource + OGLShader shader; + }; + struct VertexShader { OGLShader shader; }; @@ -81,6 +87,13 @@ public: private: struct SamplerInfo {}; + /// Binds the framebuffer color and depth surface + void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, + bool has_stencil); + + /// Syncs the viewport to match the guest state + void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); + /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); @@ -117,6 +130,12 @@ private: RasterizerCacheOpenGL res_cache; + /// Shader used for test renderering - to be removed once we have emulated shaders + MaxwellShader test_shader{}; + + const MaxwellShader* current_shader{}; + bool shader_dirty{}; + struct { UniformData data; bool dirty; @@ -127,7 +146,7 @@ private: OGLVertexArray hw_vao; std::array<bool, 16> hw_vao_enabled_attributes; - std::array<SamplerInfo, 3> texture_samplers; + std::array<SamplerInfo, 32> texture_samplers; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; std::unique_ptr<OGLStreamBuffer> vertex_buffer; OGLBuffer uniform_buffer; @@ -136,8 +155,6 @@ private: static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; std::unique_ptr<OGLStreamBuffer> stream_buffer; - GLint vs_input_index_min; - GLint vs_input_index_max; GLsizeiptr vs_input_size; void AnalyzeVertexArray(bool is_indexed); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 884637ca5..2ffbd3bab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -21,9 +21,13 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/vector_math.h" +#include "core/core.h" #include "core/frontend/emu_window.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/utils.h" @@ -107,67 +111,28 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { } template <bool morton_to_gl, PixelFormat format> -static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { +static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 tile_size = bytes_per_pixel * 64; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); - gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; - - const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); - const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); - const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); - - ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); - - const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; - u32 x = static_cast<u32>((begin_pixel_index % (stride * 8)) / 8); - u32 y = static_cast<u32>((begin_pixel_index / (stride * 8)) * 8); - - gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; - - auto glbuf_next_tile = [&] { - x = (x + 8) % stride; - gl_buffer += 8 * gl_bytes_per_pixel; - if (!x) { - y += 8; - gl_buffer -= stride * 9 * gl_bytes_per_pixel; - } - }; - u8* tile_buffer = Memory::GetPhysicalPointer(start); - - if (start < aligned_start && !morton_to_gl) { - std::array<u8, tile_size> tmp_buf; - MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], - std::min(aligned_start, end) - start); - - tile_buffer += aligned_start - start; - glbuf_next_tile(); - } - - const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; - while (tile_buffer < buffer_end) { - MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer); - tile_buffer += tile_size; - glbuf_next_tile(); - } - - if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { - std::array<u8, tile_size> tmp_buf; - MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); - } + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the + // configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); } -static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::RGBA8>, // 0 - MortonCopy<true, PixelFormat::RGB8>, // 1 - MortonCopy<true, PixelFormat::RGB5A1>, // 2 - MortonCopy<true, PixelFormat::RGB565>, // 3 - MortonCopy<true, PixelFormat::RGBA4>, // 4 +static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> morton_to_gl_fns = { + MortonCopy<true, PixelFormat::RGBA8>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -176,19 +141,19 @@ static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> mo nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy<true, PixelFormat::D16>, // 14 - nullptr, // 15 - MortonCopy<true, PixelFormat::D24>, // 16 - MortonCopy<true, PixelFormat::D24S8> // 17 }; -static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::RGBA8>, // 0 - MortonCopy<false, PixelFormat::RGB8>, // 1 - MortonCopy<false, PixelFormat::RGB5A1>, // 2 - MortonCopy<false, PixelFormat::RGB565>, // 3 - MortonCopy<false, PixelFormat::RGBA4>, // 4 +static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> gl_to_morton_fns = { + MortonCopy<false, PixelFormat::RGBA8>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -197,11 +162,6 @@ static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy<false, PixelFormat::D16>, // 14 - nullptr, // 15 - MortonCopy<false, PixelFormat::D24>, // 16 - MortonCopy<false, PixelFormat::D24S8> // 17 }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -290,17 +250,17 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec static bool FillSurface(const Surface& surface, const u8* fill_data, const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { - UNIMPLEMENTED(); - return true; + UNREACHABLE(); + return {}; } SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; const u32 tiled_size = is_tiled ? 8 : 1; const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - PAddr aligned_start = + VAddr aligned_start = addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - PAddr aligned_end = + VAddr aligned_end = addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); if (aligned_end - aligned_start > stride_tiled_bytes) { @@ -527,10 +487,10 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { +void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { ASSERT(type != SurfaceType::Fill); - const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + u8* const texture_src_data = Memory::GetPointer(addr); if (texture_src_data == nullptr) return; @@ -539,35 +499,30 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { gl_buffer.reset(new u8[gl_buffer_size]); } - // TODO: Should probably be done in ::Memory:: and check for other regions too - if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) - load_end = Memory::VRAM_VADDR_END; - - if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) - load_start = Memory::VRAM_VADDR; - MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); ASSERT(load_start >= addr && load_end <= end); - const u32 start_offset = load_start - addr; + const u64 start_offset = load_start - addr; if (!is_tiled) { ASSERT(type == SurfaceType::Color); - std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, - load_end - load_start); + const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, + texture_src_data + start_offset, &gl_buffer[start_offset], + true); } else { - if (type == SurfaceType::Texture) { - UNIMPLEMENTED(); - } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - load_start, load_end); - } + morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, + load_start, load_end); } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { - u8* const dst_buffer = Memory::GetPhysicalPointer(addr); +void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { + u8* const dst_buffer = Memory::GetPointer(addr); if (dst_buffer == nullptr) return; @@ -1102,18 +1057,106 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& } Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport_rect) { - UNIMPLEMENTED(); - return {}; + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + const auto& config = regs.rt[0]; + + // TODO(bunnei): This is hard corded to use just the first render buffer + LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); + + // update resolution_scale_factor and reset cache if changed + // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We + // need to fix this before making the renderer multi-threaded. + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + } + + MathUtil::Rectangle<u32> viewport_clamped{ + static_cast<u32>(MathUtil::Clamp(viewport.left, 0, static_cast<s32>(config.width))), + static_cast<u32>(MathUtil::Clamp(viewport.top, 0, static_cast<s32>(config.height))), + static_cast<u32>(MathUtil::Clamp(viewport.right, 0, static_cast<s32>(config.width))), + static_cast<u32>(MathUtil::Clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.width; + color_params.height = config.height; + SurfaceParams depth_params = color_params; + + color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); + color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); + color_params.UpdateParams(); + + ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); + // depth_params.addr = config.GetDepthBufferPhysicalAddress(); + // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + // depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + MathUtil::Rectangle<u32> color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle<u32> depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle<u32> fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); } Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } @@ -1167,7 +1210,7 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, } } -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u64 size) { +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) { if (size == 0) return; @@ -1227,7 +1270,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, } } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surface) { +void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) { if (size == 0) return; @@ -1260,10 +1303,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surf } void RasterizerCacheOpenGL::FlushAll() { - FlushRegion(0, 0xFFFFFFFF); + FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } -void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner) { +void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { if (size == 0) return; @@ -1356,6 +1399,34 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); } -void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u64 size, int delta) { - UNIMPLEMENTED(); +void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + const u64 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u64 page_start = addr >> Memory::PAGE_BITS; + const u64 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 17ce0fee7..1f660d30c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -22,15 +22,16 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/gpu.h" #include "video_core/renderer_opengl/gl_resource_manager.h" struct CachedSurface; using Surface = std::shared_ptr<CachedSurface>; using SurfaceSet = std::set<Surface>; -using SurfaceRegions = boost::icl::interval_set<PAddr>; -using SurfaceMap = boost::icl::interval_map<PAddr, Surface>; -using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet>; +using SurfaceRegions = boost::icl::interval_set<VAddr>; +using SurfaceMap = boost::icl::interval_map<VAddr, Surface>; +using SurfaceCache = boost::icl::interval_map<VAddr, SurfaceSet>; using SurfaceInterval = SurfaceCache::interval_type; static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && @@ -40,7 +41,7 @@ static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; -using PageMap = boost::icl::interval_map<u32, int>; +using PageMap = boost::icl::interval_map<u64, int>; enum class ScaleMatch { Exact, // only accept same res scale @@ -115,6 +116,24 @@ struct SurfaceParams { return GetFormatBpp(pixel_format); } + static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { + switch (format) { + case Tegra::RenderTargetFormat::RGBA8_UNORM: + return PixelFormat::RGBA8; + default: + UNREACHABLE(); + } + } + + static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { + switch (format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: + return PixelFormat::RGBA8; + default: + UNREACHABLE(); + } + } + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType b_type = GetFormatType(pixel_format_b); @@ -211,8 +230,8 @@ struct SurfaceParams { MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; - PAddr addr = 0; - PAddr end = 0; + VAddr addr = 0; + VAddr end = 0; u64 size = 0; u32 width = 0; @@ -257,9 +276,9 @@ struct CachedSurface : SurfaceParams { std::unique_ptr<u8[]> gl_buffer; size_t gl_buffer_size = 0; - // Read/Write data in 3DS memory to/from gl_buffer - void LoadGLBuffer(PAddr load_start, PAddr load_end); - void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + // Read/Write data in Switch memory to/from gl_buffer + void LoadGLBuffer(VAddr load_start, VAddr load_end); + void FlushGLBuffer(VAddr flush_start, VAddr flush_end); // Upload/Download data in gl_buffer in/to this surface's texture void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, @@ -298,7 +317,7 @@ public: /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const MathUtil::Rectangle<s32>& viewport_rect); + const MathUtil::Rectangle<s32>& viewport); /// Get a surface that matches the fill config Surface GetFillSurface(const void* config); @@ -307,10 +326,10 @@ public: SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(PAddr addr, u64 size, Surface flush_surface = nullptr); + void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr); /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) - void InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner); + void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); @@ -319,7 +338,7 @@ private: void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, PAddr addr, u64 size); + void ValidateSurface(const Surface& surface, VAddr addr, u64 size); /// Create a new surface Surface CreateSurface(const SurfaceParams& params); @@ -331,7 +350,7 @@ private: void UnregisterSurface(const Surface& surface); /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(PAddr addr, u64 size, int delta); + void UpdatePagesCachedCount(VAddr addr, u64 size, int delta); SurfaceCache surface_cache; PageMap cached_pages; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0e0ef18cc..564ea8f9e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -26,7 +26,7 @@ public: sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} std::string Decompile() { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index f242bce1d..8f3c98800 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -8,12 +8,12 @@ namespace GLShader { std::string GenerateVertexShader(const MaxwellVSConfig& config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } std::string GenerateFragmentShader(const MaxwellFSConfig& config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a3ba16761..a6c6204d5 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -38,8 +38,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", - &vertex_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", + &vertex_shader_error[0]); } } } @@ -62,8 +62,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling geometry shader:\n%s", - &geometry_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", + &geometry_shader_error[0]); } } } @@ -86,8 +86,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", + &fragment_shader_error[0]); } } } @@ -128,20 +128,20 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); } } // If the program linking failed at least one of the shaders was probably bad if (result == GL_FALSE) { if (vertex_shader) { - LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); + LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); } if (geometry_shader) { - LOG_ERROR(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); + LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); } if (fragment_shader) { - LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); + LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); } } ASSERT_MSG(result == GL_TRUE, "Shader not linked"); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 940575dfa..c1f4efc8c 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -85,7 +85,7 @@ public: struct { GLuint texture_2d; // GL_TEXTURE_BINDING_2D GLuint sampler; // GL_SAMPLER_BINDING - } texture_units[3]; + } texture_units[32]; struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h new file mode 100644 index 000000000..d847317ac --- /dev/null +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -0,0 +1,50 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <glad/glad.h> +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" + +namespace MaxwellToGL { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +inline GLenum VertexType(Maxwell::VertexAttribute attrib) { + switch (attrib.type) { + case Maxwell::VertexAttribute::Type::UnsignedNorm: { + + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return GL_UNSIGNED_BYTE; + } + + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str()); + UNREACHABLE(); + return {}; + } + + case Maxwell::VertexAttribute::Type::Float: + return GL_FLOAT; + } + + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str()); + UNREACHABLE(); + return {}; +} + +inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::TriangleStrip: + return GL_TRIANGLE_STRIP; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology); + UNREACHABLE(); + return {}; +} + +} // namespace MaxwellToGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 65d38ade5..78b50b227 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -20,6 +20,7 @@ #include "core/settings.h" #include "core/tracer/recorder.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/utils.h" #include "video_core/video_core.h" static const char vertex_shader[] = R"( @@ -98,197 +99,88 @@ RendererOpenGL::RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) -void RendererOpenGL::SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) { +void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) { + Core::System::GetInstance().perf_stats.EndSystemFrame(); + // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.Apply(); - if (framebuffer_info != boost::none) { - // If framebuffer_info is provided, reload it from memory to a texture - if (screen_info.texture.width != (GLsizei)framebuffer_info->width || - screen_info.texture.height != (GLsizei)framebuffer_info->height || - screen_info.texture.pixel_format != framebuffer_info->pixel_format) { + if (framebuffer != boost::none) { + // If framebuffer is provided, reload it from memory to a texture + if (screen_info.texture.width != (GLsizei)framebuffer->width || + screen_info.texture.height != (GLsizei)framebuffer->height || + screen_info.texture.pixel_format != framebuffer->pixel_format) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. - ConfigureFramebufferTexture(screen_info.texture, *framebuffer_info); + ConfigureFramebufferTexture(screen_info.texture, *framebuffer); } - LoadFBToScreenInfo(*framebuffer_info, screen_info); - } - - DrawScreens(); - Core::System::GetInstance().perf_stats.EndSystemFrame(); + // Load the framebuffer from memory, draw it to the screen, and swap buffers + LoadFBToScreenInfo(*framebuffer, screen_info); + DrawScreen(); + render_window->SwapBuffers(); + } - // Swap buffers render_window->PollEvents(); - render_window->SwapBuffers(); Core::System::GetInstance().frame_limiter.DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); Core::System::GetInstance().perf_stats.BeginSystemFrame(); + // Restore the rasterizer state prev_state.Apply(); RefreshRasterizerSetting(); } -static inline u32 MortonInterleave128(u32 x, u32 y) { - // 128x128 Z-Order coordinate from 2D coordinates - static constexpr u32 xlut[] = { - 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, - 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, - 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, - 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, - 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, - 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, - 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, - 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, - 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, - 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, - 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, - 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, - 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, - 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, - 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, - 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, - 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, - 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, - 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, - 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, - 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, - 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, - 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, - 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, - 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, - 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, - 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, - 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, - 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, - 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, - 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, - 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, - 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, - 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, - }; - static constexpr u32 ylut[] = { - 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, - 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, - 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, - 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, - 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, - 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, - 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, - 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, - 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, - 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, - 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, - 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, - 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, - 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, - 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, - 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, - 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, - 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, - 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, - 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, - 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, - 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, - 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, - 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, - 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, - 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, - 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, - 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, - 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, - 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, - 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, - 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, - 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, - 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, - 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, - }; - return xlut[x % 128] + ylut[y % 128]; -} - -static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { - // Calculates the offset of the position of the pixel in Morton order - // Framebuffer images are split into 128x128 tiles. - - const unsigned int block_height = 128; - const unsigned int coarse_x = x & ~127; - - u32 i = MortonInterleave128(x, y); - - const unsigned int offset = coarse_x * block_height; - - return (i + offset) * bytes_per_pixel; -} - -static void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, - u8* morton_data, u8* gl_data, bool morton_to_gl) { - u8* data_ptrs[2]; - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~127; - u32 morton_offset = - GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; - - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; - - memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); - } - } -} - /** * Loads framebuffer from emulated memory into the active OpenGL texture. */ -void RendererOpenGL::LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info, +void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, ScreenInfo& screen_info) { - const u32 bpp{FramebufferInfo::BytesPerPixel(framebuffer_info.pixel_format)}; - const u32 size_in_bytes{framebuffer_info.stride * framebuffer_info.height * bpp}; + const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)}; + const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; + const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - MortonCopyPixels128(framebuffer_info.width, framebuffer_info.height, bpp, 4, - Memory::GetPointer(framebuffer_info.address), gl_framebuffer_data.data(), - true); - - LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%llx(%dx%d), fmt %x", size_in_bytes, - framebuffer_info.address, framebuffer_info.width, framebuffer_info.height, - (int)framebuffer_info.pixel_format); + // Framebuffer orientation handling + framebuffer_transform_flags = framebuffer.transform_flags; // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default // only allows rows to have a memory alignement of 4. - ASSERT(framebuffer_info.stride % 4 == 0); + ASSERT(framebuffer.stride % 4 == 0); - framebuffer_flip_vertical = framebuffer_info.flip_vertical; + if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride, + screen_info)) { + // Reset the screen info's display texture to its own permanent texture + screen_info.display_texture = screen_info.texture.resource.handle; + screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); - // Reset the screen info's display texture to its own permanent texture - screen_info.display_texture = screen_info.texture.resource.handle; - screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); + Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes); - // Memory::RasterizerFlushRegion(framebuffer_info.address, size_in_bytes); + VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, + Memory::GetPointer(framebuffer_addr), + gl_framebuffer_data.data(), true); - state.texture_units[0].texture_2d = screen_info.texture.resource.handle; - state.Apply(); + state.texture_units[0].texture_2d = screen_info.texture.resource.handle; + state.Apply(); - glActiveTexture(GL_TEXTURE0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)framebuffer_info.stride); + glActiveTexture(GL_TEXTURE0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); - // Update existing texture - // TODO: Test what happens on hardware when you change the framebuffer dimensions so that - // they differ from the LCD resolution. - // TODO: Applications could theoretically crash Citra here by specifying too large - // framebuffer sizes. We should make sure that this cannot happen. - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer_info.width, framebuffer_info.height, - screen_info.texture.gl_format, screen_info.texture.gl_type, - gl_framebuffer_data.data()); + // Update existing texture + // TODO: Test what happens on hardware when you change the framebuffer dimensions so that + // they differ from the LCD resolution. + // TODO: Applications could theoretically crash yuzu here by specifying too large + // framebuffer sizes. We should make sure that this cannot happen. + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, + screen_info.texture.gl_format, screen_info.texture.gl_type, + gl_framebuffer_data.data()); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - state.texture_units[0].texture_2d = 0; - state.Apply(); + state.texture_units[0].texture_2d = 0; + state.Apply(); + } } /** @@ -372,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() { } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, - const FramebufferInfo& framebuffer_info) { + const Tegra::FramebufferConfig& framebuffer) { - texture.width = framebuffer_info.width; - texture.height = framebuffer_info.height; + texture.width = framebuffer.width; + texture.height = framebuffer.height; GLint internal_format; - switch (framebuffer_info.pixel_format) { - case FramebufferInfo::PixelFormat::ABGR8: + switch (framebuffer.pixel_format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: // Use RGBA8 and swap in the fragment shader internal_format = GL_RGBA; texture.gl_format = GL_RGBA; @@ -387,7 +279,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, gl_framebuffer_data.resize(texture.width * texture.height * 4); break; default: - UNIMPLEMENTED(); + UNREACHABLE(); } state.texture_units[0].texture_2d = texture.resource.handle; @@ -401,11 +293,22 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, state.Apply(); } -void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, - float h) { +void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, + float h) { const auto& texcoords = screen_info.display_texcoords; - const auto& left = framebuffer_flip_vertical ? texcoords.right : texcoords.left; - const auto& right = framebuffer_flip_vertical ? texcoords.left : texcoords.right; + auto left = texcoords.left; + auto right = texcoords.right; + if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) + if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) { + // Flip the framebuffer vertically + left = texcoords.right; + right = texcoords.left; + } else { + // Other transformations are unsupported + LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags=%d", + framebuffer_transform_flags); + UNIMPLEMENTED(); + } std::array<ScreenRectVertex, 4> vertices = {{ ScreenRectVertex(x, y, texcoords.top, right), @@ -427,7 +330,7 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl /** * Draws the emulated screens to the emulator window. */ -void RendererOpenGL::DrawScreens() { +void RendererOpenGL::DrawScreen() { const auto& layout = render_window->GetFramebufferLayout(); const auto& screen = layout.screen; @@ -443,8 +346,8 @@ void RendererOpenGL::DrawScreens() { glActiveTexture(GL_TEXTURE0); glUniform1i(uniform_color_texture, 0); - DrawSingleScreen(screen_info, (float)screen.left, (float)screen.top, (float)screen.GetWidth(), - (float)screen.GetHeight()); + DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top, + (float)screen.GetWidth(), (float)screen.GetHeight()); m_current_frame++; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 05bb3c5cf..fffd0f9f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -21,7 +21,7 @@ struct TextureInfo { GLsizei height; GLenum gl_format; GLenum gl_type; - RendererBase::FramebufferInfo::PixelFormat pixel_format; + Tegra::FramebufferConfig::PixelFormat pixel_format; }; /// Structure used for storing information about the display target for each 3DS screen @@ -37,7 +37,7 @@ public: ~RendererOpenGL() override; /// Swap buffers (render frame) - void SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) override; + void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) override; /** * Set the emulator window to use for renderer @@ -53,13 +53,14 @@ public: private: void InitOpenGLObjects(); - void ConfigureFramebufferTexture(TextureInfo& texture, const FramebufferInfo& framebuffer_info); - void DrawScreens(); - void DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h); + void ConfigureFramebufferTexture(TextureInfo& texture, + const Tegra::FramebufferConfig& framebuffer); + void DrawScreen(); + void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); void UpdateFramerate(); // Loads framebuffer from emulated memory into the display information structure - void LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info, ScreenInfo& screen_info); + void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, ScreenInfo& screen_info); // Fills active OpenGL texture with the given RGBA color. void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); @@ -87,6 +88,6 @@ private: GLuint attrib_position; GLuint attrib_tex_coord; - /// Flips the framebuffer vertically when true - bool framebuffer_flip_vertical; + /// Used for transforming the framebuffer orientation + Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; }; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp new file mode 100644 index 000000000..2e87281eb --- /dev/null +++ b/src/video_core/textures/decoders.cpp @@ -0,0 +1,105 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include "common/assert.h" +#include "video_core/textures/decoders.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +namespace Texture { + +/** + * Calculates the offset of an (x, y) position within a swizzled texture. + * Taken from the Tegra X1 TRM. + */ +static u32 GetSwizzleOffset(u32 x, u32 y, u32 image_width, u32 bytes_per_pixel, u32 block_height) { + u32 image_width_in_gobs = image_width * bytes_per_pixel / 64; + u32 GOB_address = 0 + (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs + + (x * bytes_per_pixel / 64) * 512 * block_height + + (y % (8 * block_height) / 8) * 512; + x *= bytes_per_pixel; + u32 address = GOB_address + ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); + + return address; +} + +static void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, + u8* swizzled_data, u8* unswizzled_data, bool unswizzle, + u32 block_height) { + u8* data_ptrs[2]; + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x) { + u32 swizzle_offset = GetSwizzleOffset(x, y, width, bytes_per_pixel, block_height); + u32 pixel_index = (x + y * width) * out_bytes_per_pixel; + + data_ptrs[unswizzle] = swizzled_data + swizzle_offset; + data_ptrs[!unswizzle] = &unswizzled_data[pixel_index]; + + std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); + } + } +} + +u32 BytesPerPixel(TextureFormat format) { + switch (format) { + case TextureFormat::DXT1: + // In this case a 'pixel' actually refers to a 4x4 tile. + return 8; + case TextureFormat::A8R8G8B8: + return 4; + default: + UNIMPLEMENTED_MSG("Format not implemented"); + break; + } +} + +std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) { + u8* data = Memory::GetPointer(address); + u32 bytes_per_pixel = BytesPerPixel(format); + + static constexpr u32 DefaultBlockHeight = 16; + + std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); + + switch (format) { + case TextureFormat::DXT1: + // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values. + CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, + unswizzled_data.data(), true, DefaultBlockHeight); + break; + case TextureFormat::A8R8G8B8: + CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, + unswizzled_data.data(), true, DefaultBlockHeight); + break; + default: + UNIMPLEMENTED_MSG("Format not implemented"); + break; + } + + return unswizzled_data; +} + +std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, + u32 height) { + std::vector<u8> rgba_data; + + // TODO(Subv): Implement. + switch (format) { + case TextureFormat::DXT1: + case TextureFormat::A8R8G8B8: + // TODO(Subv): For the time being just forward the same data without any decoding. + rgba_data = texture_data; + break; + default: + UNIMPLEMENTED_MSG("Format not implemented"); + break; + } + + return rgba_data; +} + +} // namespace Texture +} // namespace Tegra diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h new file mode 100644 index 000000000..0c21694ff --- /dev/null +++ b/src/video_core/textures/decoders.h @@ -0,0 +1,26 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_types.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +namespace Texture { + +/** + * Unswizzles a swizzled texture without changing its format. + */ +std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height); + +/** + * Decodes an unswizzled texture into a A8R8G8B8 texture. + */ +std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, + u32 height); + +} // namespace Texture +} // namespace Tegra diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h new file mode 100644 index 000000000..07936f8a3 --- /dev/null +++ b/src/video_core/textures/texture.h @@ -0,0 +1,137 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/memory_manager.h" + +namespace Tegra { +namespace Texture { + +enum class TextureFormat : u32 { + A8R8G8B8 = 8, + DXT1 = 0x24, +}; + +enum class TextureType : u32 { + Texture1D = 0, + Texture2D = 1, + Texture3D = 2, + TextureCubemap = 3, + Texture1DArray = 4, + Texture2DArray = 5, + Texture1DBuffer = 6, + Texture2DNoMipmap = 7, + TextureCubeArray = 8, +}; + +enum class TICHeaderVersion : u32 { + OneDBuffer = 0, + PitchColorKey = 1, + Pitch = 2, + BlockLinear = 3, + BlockLinearColorKey = 4, +}; + +union TextureHandle { + u32 raw; + BitField<0, 20, u32> tic_id; + BitField<20, 12, u32> tsc_id; +}; +static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); + +struct TICEntry { + union { + u32 raw; + BitField<0, 7, TextureFormat> format; + BitField<7, 3, u32> r_type; + BitField<10, 3, u32> g_type; + BitField<13, 3, u32> b_type; + BitField<16, 3, u32> a_type; + }; + u32 address_low; + union { + BitField<0, 16, u32> address_high; + BitField<21, 3, TICHeaderVersion> header_version; + }; + INSERT_PADDING_BYTES(4); + union { + BitField<0, 16, u32> width_minus_1; + BitField<23, 4, TextureType> texture_type; + }; + u16 height_minus_1; + INSERT_PADDING_BYTES(10); + + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); + } + + u32 Width() const { + return width_minus_1 + 1; + } + + u32 Height() const { + return height_minus_1 + 1; + } +}; +static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); + +enum class WrapMode : u32 { + Wrap = 0, + Mirror = 1, + ClampToEdge = 2, + Border = 3, + ClampOGL = 4, + MirrorOnceClampToEdge = 5, + MirrorOnceBorder = 6, + MirrorOnceClampOGL = 7, +}; + +enum class TextureFilter : u32 { + Nearest = 1, + Linear = 2, +}; + +enum class TextureMipmapFilter : u32 { + None = 1, + Nearest = 2, + Linear = 3, +}; + +struct TSCEntry { + union { + BitField<0, 3, WrapMode> wrap_u; + BitField<3, 3, WrapMode> wrap_v; + BitField<6, 3, WrapMode> wrap_p; + BitField<9, 1, u32> depth_compare_enabled; + BitField<10, 3, u32> depth_compare_func; + }; + union { + BitField<0, 2, TextureFilter> mag_filter; + BitField<4, 2, TextureFilter> min_filter; + BitField<6, 2, TextureMipmapFilter> mip_filter; + }; + INSERT_PADDING_BYTES(8); + u32 border_color_r; + u32 border_color_g; + u32 border_color_b; + u32 border_color_a; +}; +static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); + +struct FullTextureInfo { + u32 index; + TICEntry tic; + TSCEntry tsc; + bool enabled; +}; + +/// Returns the number of bytes per pixel of the input texture format. +u32 BytesPerPixel(TextureFormat format); + +} // namespace Texture +} // namespace Tegra diff --git a/src/video_core/utils.h b/src/video_core/utils.h index d94a10417..be0f7e22b 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -49,4 +49,116 @@ static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) { return (i + offset) * bytes_per_pixel; } +static inline u32 MortonInterleave128(u32 x, u32 y) { + // 128x128 Z-Order coordinate from 2D coordinates + static constexpr u32 xlut[] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, + 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, + 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, + 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, + 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, + 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, + 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, + 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, + 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, + 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, + 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, + 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, + 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, + 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, + 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, + 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, + 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, + 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, + 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, + 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, + 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, + 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, + 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, + 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, + 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, + 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, + 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, + 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, + 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, + 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, + 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, + 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, + 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, + }; + static constexpr u32 ylut[] = { + 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, + 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, + 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, + 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, + 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, + 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, + 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, + 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, + 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, + 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, + 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, + 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, + 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, + 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, + 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, + 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, + 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, + 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, + 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, + 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, + 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, + 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, + 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, + 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, + 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, + 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, + 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, + 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, + 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, + 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, + 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, + 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, + 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, + 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, + 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, + }; + return xlut[x % 128] + ylut[y % 128]; +} + +static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { + // Calculates the offset of the position of the pixel in Morton order + // Framebuffer images are split into 128x128 tiles. + + const unsigned int block_height = 128; + const unsigned int coarse_x = x & ~127; + + u32 i = MortonInterleave128(x, y); + + const unsigned int offset = coarse_x * block_height; + + return (i + offset) * bytes_per_pixel; +} + +static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, + u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, + bool morton_to_gl) { + u8* data_ptrs[2]; + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x) { + const u32 coarse_y = y & ~127; + u32 morton_offset = + GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; + u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + + data_ptrs[morton_to_gl] = morton_data + morton_offset; + data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; + + memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); + } + } +} + } // namespace VideoCore diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 864691baa..289140f31 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -26,7 +26,7 @@ bool Init(EmuWindow* emu_window) { if (g_renderer->Init()) { LOG_DEBUG(Render, "initialized OK"); } else { - LOG_ERROR(Render, "initialization failed !"); + LOG_CRITICAL(Render, "initialization failed !"); return false; } return true; diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 1fd90b9d0..37da62436 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -15,6 +15,8 @@ class RendererBase; namespace VideoCore { +enum class Renderer { Software, OpenGL }; + extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin extern EmuWindow* g_emu_window; ///< Emu window |