summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt8
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp64
-rw-r--r--src/video_core/debug_utils/debug_utils.h163
-rw-r--r--src/video_core/engines/maxwell_3d.cpp223
-rw-r--r--src/video_core/engines/maxwell_3d.h287
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/gpu.h46
-rw-r--r--src/video_core/macro_interpreter.cpp257
-rw-r--r--src/video_core/macro_interpreter.h164
-rw-r--r--src/video_core/rasterizer_interface.h14
-rw-r--r--src/video_core/renderer_base.cpp7
-rw-r--r--src/video_core/renderer_base.h40
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp395
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h35
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp285
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h47
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_state.h2
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h50
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp241
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h17
-rw-r--r--src/video_core/textures/decoders.cpp105
-rw-r--r--src/video_core/textures/decoders.h26
-rw-r--r--src/video_core/textures/texture.h137
-rw-r--r--src/video_core/utils.h112
-rw-r--r--src/video_core/video_core.cpp2
-rw-r--r--src/video_core/video_core.h2
29 files changed, 2218 insertions, 541 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e56253c4c..a710c4bc5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,6 +1,8 @@
add_library(video_core STATIC
command_processor.cpp
command_processor.h
+ debug_utils/debug_utils.cpp
+ debug_utils/debug_utils.h
engines/fermi_2d.cpp
engines/fermi_2d.h
engines/maxwell_3d.cpp
@@ -9,6 +11,8 @@ add_library(video_core STATIC
engines/maxwell_compute.h
gpu.cpp
gpu.h
+ macro_interpreter.cpp
+ macro_interpreter.h
memory_manager.cpp
memory_manager.h
rasterizer_interface.h
@@ -29,8 +33,12 @@ add_library(video_core STATIC
renderer_opengl/gl_state.h
renderer_opengl/gl_stream_buffer.cpp
renderer_opengl/gl_stream_buffer.h
+ renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
+ textures/decoders.cpp
+ textures/decoders.h
+ textures/texture.h
utils.h
video_core.cpp
video_core.h
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
new file mode 100644
index 000000000..22d44aab2
--- /dev/null
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -0,0 +1,64 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <condition_variable>
+#include <cstdint>
+#include <cstring>
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <string>
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/color.h"
+#include "common/common_types.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "common/vector_math.h"
+#include "video_core/debug_utils/debug_utils.h"
+
+namespace Tegra {
+
+void DebugContext::DoOnEvent(Event event, void* data) {
+ {
+ std::unique_lock<std::mutex> lock(breakpoint_mutex);
+
+ // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
+ // show on debug widgets
+
+ // TODO: Should stop the CPU thread here once we multithread emulation.
+
+ active_breakpoint = event;
+ at_breakpoint = true;
+
+ // Tell all observers that we hit a breakpoint
+ for (auto& breakpoint_observer : breakpoint_observers) {
+ breakpoint_observer->OnMaxwellBreakPointHit(event, data);
+ }
+
+ // Wait until another thread tells us to Resume()
+ resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
+ }
+}
+
+void DebugContext::Resume() {
+ {
+ std::lock_guard<std::mutex> lock(breakpoint_mutex);
+
+ // Tell all observers that we are about to resume
+ for (auto& breakpoint_observer : breakpoint_observers) {
+ breakpoint_observer->OnMaxwellResume();
+ }
+
+ // Resume the waiting thread (i.e. OnEvent())
+ at_breakpoint = false;
+ }
+
+ resume_from_breakpoint.notify_one();
+}
+
+} // namespace Tegra
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
new file mode 100644
index 000000000..bbba8e380
--- /dev/null
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -0,0 +1,163 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <condition_variable>
+#include <iterator>
+#include <list>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "common/vector_math.h"
+
+namespace Tegra {
+
+class DebugContext {
+public:
+ enum class Event {
+ FirstEvent = 0,
+
+ MaxwellCommandLoaded = FirstEvent,
+ MaxwellCommandProcessed,
+ IncomingPrimitiveBatch,
+ FinishedPrimitiveBatch,
+
+ NumEvents
+ };
+
+ /**
+ * Inherit from this class to be notified of events registered to some debug context.
+ * Most importantly this is used for our debugger GUI.
+ *
+ * To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods.
+ * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
+ * access
+ * @todo Evaluate an alternative interface, in which there is only one managing observer and
+ * multiple child observers running (by design) on the same thread.
+ */
+ class BreakPointObserver {
+ public:
+ /// Constructs the object such that it observes events of the given DebugContext.
+ BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
+ : context_weak(debug_context) {
+ std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
+ debug_context->breakpoint_observers.push_back(this);
+ }
+
+ virtual ~BreakPointObserver() {
+ auto context = context_weak.lock();
+ if (context) {
+ std::unique_lock<std::mutex> lock(context->breakpoint_mutex);
+ context->breakpoint_observers.remove(this);
+
+ // If we are the last observer to be destroyed, tell the debugger context that
+ // it is free to continue. In particular, this is required for a proper yuzu
+ // shutdown, when the emulation thread is waiting at a breakpoint.
+ if (context->breakpoint_observers.empty())
+ context->Resume();
+ }
+ }
+
+ /**
+ * Action to perform when a breakpoint was reached.
+ * @param event Type of event which triggered the breakpoint
+ * @param data Optional data pointer (if unused, this is a nullptr)
+ * @note This function will perform nothing unless it is overridden in the child class.
+ */
+ virtual void OnMaxwellBreakPointHit(Event event, void* data) {}
+
+ /**
+ * Action to perform when emulation is resumed from a breakpoint.
+ * @note This function will perform nothing unless it is overridden in the child class.
+ */
+ virtual void OnMaxwellResume() {}
+
+ protected:
+ /**
+ * Weak context pointer. This need not be valid, so when requesting a shared_ptr via
+ * context_weak.lock(), always compare the result against nullptr.
+ */
+ std::weak_ptr<DebugContext> context_weak;
+ };
+
+ /**
+ * Simple structure defining a breakpoint state
+ */
+ struct BreakPoint {
+ bool enabled = false;
+ };
+
+ /**
+ * Static constructor used to create a shared_ptr of a DebugContext.
+ */
+ static std::shared_ptr<DebugContext> Construct() {
+ return std::shared_ptr<DebugContext>(new DebugContext);
+ }
+
+ /**
+ * Used by the emulation core when a given event has happened. If a breakpoint has been set
+ * for this event, OnEvent calls the event handlers of the registered breakpoint observers.
+ * The current thread then is halted until Resume() is called from another thread (or until
+ * emulation is stopped).
+ * @param event Event which has happened
+ * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
+ * Resume() is called.
+ */
+ void OnEvent(Event event, void* data) {
+ // This check is left in the header to allow the compiler to inline it.
+ if (!breakpoints[(int)event].enabled)
+ return;
+ // For the rest of event handling, call a separate function.
+ DoOnEvent(event, data);
+ }
+
+ void DoOnEvent(Event event, void* data);
+
+ /**
+ * Resume from the current breakpoint.
+ * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
+ * Calling from any other thread is safe.
+ */
+ void Resume();
+
+ /**
+ * Delete all set breakpoints and resume emulation.
+ */
+ void ClearBreakpoints() {
+ for (auto& bp : breakpoints) {
+ bp.enabled = false;
+ }
+ Resume();
+ }
+
+ // TODO: Evaluate if access to these members should be hidden behind a public interface.
+ std::array<BreakPoint, (int)Event::NumEvents> breakpoints;
+ Event active_breakpoint;
+ bool at_breakpoint = false;
+
+private:
+ /**
+ * Private default constructor to make sure people always construct this through Construct()
+ * instead.
+ */
+ DebugContext() = default;
+
+ /// Mutex protecting current breakpoint state and the observer list.
+ std::mutex breakpoint_mutex;
+
+ /// Used by OnEvent to wait for resumption.
+ std::condition_variable resume_from_breakpoint;
+
+ /// List of registered observers
+ std::list<BreakPointObserver*> breakpoint_observers;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 4d9745e48..124753032 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -2,8 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cinttypes>
#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/debug_utils/debug_utils.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/textures/texture.h"
+#include "video_core/video_core.h"
namespace Tegra {
namespace Engines {
@@ -11,41 +19,29 @@ namespace Engines {
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
-const std::unordered_map<u32, Maxwell3D::MethodInfo> Maxwell3D::method_handlers = {
- {0xE1A, {"BindTextureInfoBuffer", 1, &Maxwell3D::BindTextureInfoBuffer}},
- {0xE24, {"SetShader", 5, &Maxwell3D::SetShader}},
- {0xE2A, {"BindStorageBuffer", 1, &Maxwell3D::BindStorageBuffer}},
-};
-
-Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+Maxwell3D::Maxwell3D(MemoryManager& memory_manager)
+ : memory_manager(memory_manager), macro_interpreter(*this) {}
void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) {
uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code);
}
-void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
- // TODO(Subv): Write an interpreter for the macros uploaded via registers 0x45 and 0x47
-
+void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
+ auto macro_code = uploaded_macros.find(method);
// The requested macro must have been uploaded already.
- ASSERT_MSG(uploaded_macros.find(method) != uploaded_macros.end(), "Macro %08X was not uploaded",
- method);
-
- auto itr = method_handlers.find(method);
- ASSERT_MSG(itr != method_handlers.end(), "Unhandled method call %08X", method);
-
- ASSERT(itr->second.arguments == parameters.size());
+ ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method);
- (this->*itr->second.handler)(parameters);
-
- // Reset the current macro and its parameters.
+ // Reset the current macro and execute it.
executing_macro = 0;
- macro_params.clear();
+ macro_interpreter.Execute(macro_code->second, std::move(parameters));
}
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
+ auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
@@ -67,11 +63,15 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
// Call the macro when there are no more parameters in the command buffer
if (remaining_params == 0) {
- CallMacroMethod(executing_macro, macro_params);
+ CallMacroMethod(executing_macro, std::move(macro_params));
}
return;
}
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
+ }
+
regs.reg_array[method] = value;
#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
@@ -137,6 +137,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
}
#undef MAXWELL3D_REG_INDEX
+
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
+ }
}
void Maxwell3D::ProcessQueryGet() {
@@ -159,85 +163,20 @@ void Maxwell3D::ProcessQueryGet() {
}
void Maxwell3D::DrawArrays() {
- LOG_WARNING(HW_GPU, "Game requested a DrawArrays, ignoring");
-}
+ LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
+ regs.vertex_buffer.count);
-void Maxwell3D::BindTextureInfoBuffer(const std::vector<u32>& parameters) {
- /**
- * Parameters description:
- * [0] = Shader stage, usually 4 for FragmentShader
- */
+ auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
- u32 stage = parameters[0];
-
- // Perform the same operations as the real macro code.
- GPUVAddr address = static_cast<GPUVAddr>(regs.tex_info_buffers.address[stage]) << 8;
- u32 size = regs.tex_info_buffers.size[stage];
-
- regs.const_buffer.cb_size = size;
- regs.const_buffer.cb_address_high = address >> 32;
- regs.const_buffer.cb_address_low = address & 0xFFFFFFFF;
-}
-
-void Maxwell3D::SetShader(const std::vector<u32>& parameters) {
- /**
- * Parameters description:
- * [0] = Shader Program.
- * [1] = Unknown, presumably the shader id.
- * [2] = Offset to the start of the shader, after the 0x30 bytes header.
- * [3] = Shader Stage.
- * [4] = Const Buffer Address >> 8.
- */
- auto shader_program = static_cast<Regs::ShaderProgram>(parameters[0]);
- // TODO(Subv): This address is probably an offset from the CODE_ADDRESS register.
- GPUVAddr address = parameters[2];
- auto shader_stage = static_cast<Regs::ShaderStage>(parameters[3]);
- GPUVAddr cb_address = parameters[4] << 8;
-
- auto& shader = state.shader_programs[static_cast<size_t>(shader_program)];
- shader.program = shader_program;
- shader.stage = shader_stage;
- shader.address = address;
-
- // Perform the same operations as the real macro code.
- // TODO(Subv): Early exit if register 0xD1C + shader_program contains the same as params[1].
- auto& shader_regs = regs.shader_config[static_cast<size_t>(shader_program)];
- shader_regs.start_id = address;
- // TODO(Subv): Write params[1] to register 0xD1C + shader_program.
- // TODO(Subv): Write params[2] to register 0xD22 + shader_program.
-
- // Note: This value is hardcoded in the macro's code.
- static constexpr u32 DefaultCBSize = 0x10000;
- regs.const_buffer.cb_size = DefaultCBSize;
- regs.const_buffer.cb_address_high = cb_address >> 32;
- regs.const_buffer.cb_address_low = cb_address & 0xFFFFFFFF;
-
- // Write a hardcoded 0x11 to CB_BIND, this binds the current const buffer to buffer c1[] in the
- // shader. It's likely that these are the constants for the shader.
- regs.cb_bind[static_cast<size_t>(shader_stage)].valid.Assign(1);
- regs.cb_bind[static_cast<size_t>(shader_stage)].index.Assign(1);
-
- ProcessCBBind(shader_stage);
-}
-
-void Maxwell3D::BindStorageBuffer(const std::vector<u32>& parameters) {
- /**
- * Parameters description:
- * [0] = Buffer offset >> 2
- */
-
- u32 buffer_offset = parameters[0] << 2;
-
- // Perform the same operations as the real macro code.
- // Note: This value is hardcoded in the macro's code.
- static constexpr u32 DefaultCBSize = 0x5F00;
- regs.const_buffer.cb_size = DefaultCBSize;
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
+ }
- GPUVAddr address = regs.ssbo_info.BufferAddress();
- regs.const_buffer.cb_address_high = address >> 32;
- regs.const_buffer.cb_address_low = address & 0xFFFFFFFF;
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
- regs.const_buffer.cb_pos = buffer_offset;
+ VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/);
}
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
@@ -270,5 +209,95 @@ void Maxwell3D::ProcessCBData(u32 value) {
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
}
+Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
+ GPUVAddr tic_base_address = regs.tic.TICAddress();
+
+ GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
+ VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu);
+
+ Texture::TICEntry tic_entry;
+ Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+
+ ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
+ "TIC versions other than BlockLinear are unimplemented");
+
+ ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D,
+ "Texture types other than Texture2D are unimplemented");
+
+ auto r_type = tic_entry.r_type.Value();
+ auto g_type = tic_entry.g_type.Value();
+ auto b_type = tic_entry.b_type.Value();
+ auto a_type = tic_entry.a_type.Value();
+
+ // TODO(Subv): Different data types for separate components are not supported
+ ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
+
+ return tic_entry;
+}
+
+Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
+ GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
+
+ GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
+ VAddr tsc_address_cpu = memory_manager.PhysicalToVirtualAddress(tsc_address_gpu);
+
+ Texture::TSCEntry tsc_entry;
+ Memory::ReadBlock(tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+ return tsc_entry;
+}
+
+std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
+ std::vector<Texture::FullTextureInfo> textures;
+
+ auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)];
+ auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
+ ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
+
+ GPUVAddr tic_base_address = regs.tic.TICAddress();
+
+ GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
+
+ // Offset into the texture constbuffer where the texture info begins.
+ static constexpr size_t TextureInfoOffset = 0x20;
+
+ for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
+ current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
+
+ Texture::TextureHandle tex_handle{
+ Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))};
+
+ Texture::FullTextureInfo tex_info{};
+ // TODO(Subv): Use the shader to determine which textures are actually accessed.
+ tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) /
+ sizeof(Texture::TextureHandle);
+
+ // Load the TIC data.
+ if (tex_handle.tic_id != 0) {
+ tex_info.enabled = true;
+
+ auto tic_entry = GetTICEntry(tex_handle.tic_id);
+ // TODO(Subv): Workaround for BitField's move constructor being deleted.
+ std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
+ }
+
+ // Load the TSC data
+ if (tex_handle.tsc_id != 0) {
+ auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+ // TODO(Subv): Workaround for BitField's move constructor being deleted.
+ std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
+ }
+
+ if (tex_info.enabled)
+ textures.push_back(tex_info);
+ }
+
+ return textures;
+}
+
+u32 Maxwell3D::GetRegisterValue(u32 method) const {
+ ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
+ return regs.reg_array[method];
+}
+
} // namespace Engines
} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index aab282b77..98b39b2ff 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -7,10 +7,15 @@
#include <array>
#include <unordered_map>
#include <vector>
+#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/math_util.h"
+#include "video_core/gpu.h"
+#include "video_core/macro_interpreter.h"
#include "video_core/memory_manager.h"
+#include "video_core/textures/texture.h"
namespace Tegra {
namespace Engines {
@@ -20,18 +25,13 @@ public:
explicit Maxwell3D(MemoryManager& memory_manager);
~Maxwell3D() = default;
- /// Write the value to the register identified by method.
- void WriteReg(u32 method, u32 value, u32 remaining_params);
-
- /// Uploads the code for a GPU macro program associated with the specified entry.
- void SubmitMacroCode(u32 entry, std::vector<u32> code);
-
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
static constexpr size_t NUM_REGS = 0xE36;
static constexpr size_t NumRenderTargets = 8;
+ static constexpr size_t NumViewports = 16;
static constexpr size_t NumCBData = 16;
static constexpr size_t NumVertexArrays = 32;
static constexpr size_t NumVertexAttributes = 32;
@@ -62,6 +62,192 @@ public:
Fragment = 4,
};
+ struct VertexAttribute {
+ enum class Size : u32 {
+ Size_32_32_32_32 = 0x01,
+ Size_32_32_32 = 0x02,
+ Size_16_16_16_16 = 0x03,
+ Size_32_32 = 0x04,
+ Size_16_16_16 = 0x05,
+ Size_8_8_8_8 = 0x0a,
+ Size_16_16 = 0x0f,
+ Size_32 = 0x12,
+ Size_8_8_8 = 0x13,
+ Size_8_8 = 0x18,
+ Size_16 = 0x1b,
+ Size_8 = 0x1d,
+ Size_10_10_10_2 = 0x30,
+ Size_11_11_10 = 0x31,
+ };
+
+ enum class Type : u32 {
+ SignedNorm = 1,
+ UnsignedNorm = 2,
+ SignedInt = 3,
+ UnsignedInt = 4,
+ UnsignedScaled = 5,
+ SignedScaled = 6,
+ Float = 7,
+ };
+
+ union {
+ BitField<0, 5, u32> buffer;
+ BitField<6, 1, u32> constant;
+ BitField<7, 14, u32> offset;
+ BitField<21, 6, Size> size;
+ BitField<27, 3, Type> type;
+ BitField<31, 1, u32> bgra;
+ };
+
+ u32 ComponentCount() const {
+ switch (size) {
+ case Size::Size_32_32_32_32:
+ return 4;
+ case Size::Size_32_32_32:
+ return 3;
+ case Size::Size_16_16_16_16:
+ return 4;
+ case Size::Size_32_32:
+ return 2;
+ case Size::Size_16_16_16:
+ return 3;
+ case Size::Size_8_8_8_8:
+ return 4;
+ case Size::Size_16_16:
+ return 2;
+ case Size::Size_32:
+ return 1;
+ case Size::Size_8_8_8:
+ return 3;
+ case Size::Size_8_8:
+ return 2;
+ case Size::Size_16:
+ return 1;
+ case Size::Size_8:
+ return 1;
+ case Size::Size_10_10_10_2:
+ return 4;
+ case Size::Size_11_11_10:
+ return 3;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ u32 SizeInBytes() const {
+ switch (size) {
+ case Size::Size_32_32_32_32:
+ return 16;
+ case Size::Size_32_32_32:
+ return 12;
+ case Size::Size_16_16_16_16:
+ return 8;
+ case Size::Size_32_32:
+ return 8;
+ case Size::Size_16_16_16:
+ return 6;
+ case Size::Size_8_8_8_8:
+ return 4;
+ case Size::Size_16_16:
+ return 4;
+ case Size::Size_32:
+ return 4;
+ case Size::Size_8_8_8:
+ return 3;
+ case Size::Size_8_8:
+ return 2;
+ case Size::Size_16:
+ return 2;
+ case Size::Size_8:
+ return 1;
+ case Size::Size_10_10_10_2:
+ return 4;
+ case Size::Size_11_11_10:
+ return 4;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ std::string SizeString() const {
+ switch (size) {
+ case Size::Size_32_32_32_32:
+ return "32_32_32_32";
+ case Size::Size_32_32_32:
+ return "32_32_32";
+ case Size::Size_16_16_16_16:
+ return "16_16_16_16";
+ case Size::Size_32_32:
+ return "32_32";
+ case Size::Size_16_16_16:
+ return "16_16_16";
+ case Size::Size_8_8_8_8:
+ return "8_8_8_8";
+ case Size::Size_16_16:
+ return "16_16";
+ case Size::Size_32:
+ return "32";
+ case Size::Size_8_8_8:
+ return "8_8_8";
+ case Size::Size_8_8:
+ return "8_8";
+ case Size::Size_16:
+ return "16";
+ case Size::Size_8:
+ return "8";
+ case Size::Size_10_10_10_2:
+ return "10_10_10_2";
+ case Size::Size_11_11_10:
+ return "11_11_10";
+ }
+ UNREACHABLE();
+ return {};
+ }
+
+ std::string TypeString() const {
+ switch (type) {
+ case Type::SignedNorm:
+ return "SNORM";
+ case Type::UnsignedNorm:
+ return "UNORM";
+ case Type::SignedInt:
+ return "SINT";
+ case Type::UnsignedInt:
+ return "UINT";
+ case Type::UnsignedScaled:
+ return "USCALED";
+ case Type::SignedScaled:
+ return "SSCALED";
+ case Type::Float:
+ return "FLOAT";
+ }
+ UNREACHABLE();
+ return {};
+ }
+
+ bool IsNormalized() const {
+ return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
+ }
+ };
+
+ enum class PrimitiveTopology : u32 {
+ Points = 0x0,
+ Lines = 0x1,
+ LineLoop = 0x2,
+ LineStrip = 0x3,
+ Triangles = 0x4,
+ TriangleStrip = 0x5,
+ TriangleFan = 0x6,
+ Quads = 0x7,
+ QuadStrip = 0x8,
+ Polygon = 0x9,
+ LinesAdjacency = 0xa,
+ LineStripAdjacency = 0xb,
+ TrianglesAdjacency = 0xc,
+ TriangleStripAdjacency = 0xd,
+ Patches = 0xe,
+ };
+
union {
struct {
INSERT_PADDING_WORDS(0x200);
@@ -69,9 +255,9 @@ public:
struct {
u32 address_high;
u32 address_low;
- u32 horiz;
- u32 vert;
- u32 format;
+ u32 width;
+ u32 height;
+ Tegra::RenderTargetFormat format;
u32 block_dimensions;
u32 array_mode;
u32 layer_stride;
@@ -84,7 +270,31 @@ public:
}
} rt[NumRenderTargets];
- INSERT_PADDING_WORDS(0xDD);
+ INSERT_PADDING_WORDS(0x80);
+
+ struct {
+ union {
+ BitField<0, 16, u32> x;
+ BitField<16, 16, u32> width;
+ };
+ union {
+ BitField<0, 16, u32> y;
+ BitField<16, 16, u32> height;
+ };
+ float depth_range_near;
+ float depth_range_far;
+
+ MathUtil::Rectangle<s32> GetRect() const {
+ return {
+ static_cast<s32>(x), // left
+ static_cast<s32>(y + height), // top
+ static_cast<s32>(x + width), // right
+ static_cast<s32>(y) // bottom
+ };
+ };
+ } viewport[NumViewports];
+
+ INSERT_PADDING_WORDS(0x1D);
struct {
u32 first;
@@ -108,14 +318,7 @@ public:
INSERT_PADDING_WORDS(0x5B);
- union {
- BitField<0, 5, u32> buffer;
- BitField<6, 1, u32> constant;
- BitField<7, 14, u32> offset;
- BitField<21, 6, u32> size;
- BitField<27, 3, u32> type;
- BitField<31, 1, u32> bgra;
- } vertex_attrib_format[NumVertexAttributes];
+ VertexAttribute vertex_attrib_format[NumVertexAttributes];
INSERT_PADDING_WORDS(0xF);
@@ -163,13 +366,15 @@ public:
}
} code_address;
INSERT_PADDING_WORDS(1);
+
struct {
u32 vertex_end_gl;
union {
u32 vertex_begin_gl;
- BitField<0, 16, u32> topology;
+ BitField<0, 16, PrimitiveTopology> topology;
};
} draw;
+
INSERT_PADDING_WORDS(0x139);
struct {
u32 query_address_high;
@@ -294,22 +499,27 @@ public:
bool enabled;
};
- struct ShaderProgramInfo {
- Regs::ShaderStage stage;
- Regs::ShaderProgram program;
- GPUVAddr address;
- };
-
struct ShaderStageInfo {
std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers;
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
- std::array<ShaderProgramInfo, Regs::MaxShaderProgram> shader_programs;
};
State state{};
+ /// Reads a register value located at the input method address
+ u32 GetRegisterValue(u32 method) const;
+
+ /// Write the value to the register identified by method.
+ void WriteReg(u32 method, u32 value, u32 remaining_params);
+
+ /// Uploads the code for a GPU macro program associated with the specified entry.
+ void SubmitMacroCode(u32 entry, std::vector<u32> code);
+
+ /// Returns a list of enabled textures for the specified shader stage.
+ std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
+
private:
MemoryManager& memory_manager;
@@ -320,12 +530,21 @@ private:
/// Parameters that have been submitted to the macro call so far.
std::vector<u32> macro_params;
+ /// Interpreter for the macro codes uploaded to the GPU.
+ MacroInterpreter macro_interpreter;
+
+ /// Retrieves information about a specific TIC entry from the TIC buffer.
+ Texture::TICEntry GetTICEntry(u32 tic_index) const;
+
+ /// Retrieves information about a specific TSC entry from the TSC buffer.
+ Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+
/**
* Call a macro on this engine.
* @param method Method to call
* @param parameters Arguments to the method call
*/
- void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
+ void CallMacroMethod(u32 method, std::vector<u32> parameters);
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
@@ -338,19 +557,6 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
-
- /// Method call handlers
- void BindTextureInfoBuffer(const std::vector<u32>& parameters);
- void SetShader(const std::vector<u32>& parameters);
- void BindStorageBuffer(const std::vector<u32>& parameters);
-
- struct MethodInfo {
- const char* name;
- u32 arguments;
- void (Maxwell3D::*handler)(const std::vector<u32>& parameters);
- };
-
- static const std::unordered_map<u32, MethodInfo> method_handlers;
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -358,6 +564,7 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(rt, 0x200);
+ASSERT_REG_POSITION(viewport, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index c384d236e..9463cd5d6 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -18,4 +18,8 @@ GPU::GPU() {
GPU::~GPU() = default;
+const Tegra::Engines::Maxwell3D& GPU::Get3DEngine() const {
+ return *maxwell_3d;
+}
+
} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 2a9064ba3..71a8661b4 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -8,10 +8,49 @@
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
+#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/memory_manager.h"
namespace Tegra {
+enum class RenderTargetFormat : u32 {
+ NONE = 0x0,
+ RGBA8_UNORM = 0xD5,
+};
+
+class DebugContext;
+
+/**
+ * Struct describing framebuffer configuration
+ */
+struct FramebufferConfig {
+ enum class PixelFormat : u32 {
+ ABGR8 = 1,
+ };
+
+ /**
+ * Returns the number of bytes per pixel.
+ */
+ static u32 BytesPerPixel(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::ABGR8:
+ return 4;
+ }
+
+ UNREACHABLE();
+ }
+
+ VAddr address;
+ u32 offset;
+ u32 width;
+ u32 height;
+ u32 stride;
+ PixelFormat pixel_format;
+
+ using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
+ TransformFlags transform_flags;
+};
+
namespace Engines {
class Fermi2D;
class Maxwell3D;
@@ -34,8 +73,15 @@ public:
/// Processes a command list stored at the specified address in GPU memory.
void ProcessCommandList(GPUVAddr address, u32 size);
+ /// Returns a reference to the Maxwell3D GPU engine.
+ const Engines::Maxwell3D& Get3DEngine() const;
+
std::unique_ptr<MemoryManager> memory_manager;
+ Engines::Maxwell3D& Maxwell3D() {
+ return *maxwell_3d;
+ }
+
private:
static constexpr u32 InvalidGraphMacroEntry = 0xFFFFFFFF;
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
new file mode 100644
index 000000000..993a67746
--- /dev/null
+++ b/src/video_core/macro_interpreter.cpp
@@ -0,0 +1,257 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/macro_interpreter.h"
+
+namespace Tegra {
+
+MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+
+void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
+ Reset();
+ registers[1] = parameters[0];
+ this->parameters = std::move(parameters);
+
+ // Execute the code until we hit an exit condition.
+ bool keep_executing = true;
+ while (keep_executing) {
+ keep_executing = Step(code, false);
+ }
+
+ // Assert the the macro used all the input parameters
+ ASSERT(next_parameter_index == this->parameters.size());
+}
+
+void MacroInterpreter::Reset() {
+ registers = {};
+ pc = 0;
+ delayed_pc = boost::none;
+ method_address.raw = 0;
+ parameters.clear();
+ // The next parameter index starts at 1, because $r1 already has the value of the first
+ // parameter.
+ next_parameter_index = 1;
+}
+
+bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
+ u32 base_address = pc;
+
+ Opcode opcode = GetOpcode(code);
+ pc += 4;
+
+ // Update the program counter if we were delayed
+ if (delayed_pc != boost::none) {
+ ASSERT(is_delay_slot);
+ pc = *delayed_pc;
+ delayed_pc = boost::none;
+ }
+
+ switch (opcode.operation) {
+ case Operation::ALU: {
+ u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
+ GetRegister(opcode.src_b));
+ ProcessResult(opcode.result_operation, opcode.dst, result);
+ break;
+ }
+ case Operation::AddImmediate: {
+ ProcessResult(opcode.result_operation, opcode.dst,
+ GetRegister(opcode.src_a) + opcode.immediate);
+ break;
+ }
+ case Operation::ExtractInsert: {
+ u32 dst = GetRegister(opcode.src_a);
+ u32 src = GetRegister(opcode.src_b);
+
+ src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
+ dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
+ dst |= src << opcode.bf_dst_bit;
+ ProcessResult(opcode.result_operation, opcode.dst, dst);
+ break;
+ }
+ case Operation::ExtractShiftLeftImmediate: {
+ u32 dst = GetRegister(opcode.src_a);
+ u32 src = GetRegister(opcode.src_b);
+
+ u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
+
+ ProcessResult(opcode.result_operation, opcode.dst, result);
+ break;
+ }
+ case Operation::ExtractShiftLeftRegister: {
+ u32 dst = GetRegister(opcode.src_a);
+ u32 src = GetRegister(opcode.src_b);
+
+ u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
+
+ ProcessResult(opcode.result_operation, opcode.dst, result);
+ break;
+ }
+ case Operation::Read: {
+ u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
+ ProcessResult(opcode.result_operation, opcode.dst, result);
+ break;
+ }
+ case Operation::Branch: {
+ ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
+ u32 value = GetRegister(opcode.src_a);
+ bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
+ if (taken) {
+ // Ignore the delay slot if the branch has the annul bit.
+ if (opcode.branch_annul) {
+ pc = base_address + (opcode.immediate << 2);
+ return true;
+ }
+
+ delayed_pc = base_address + (opcode.immediate << 2);
+ // Execute one more instruction due to the delay slot.
+ return Step(code, true);
+ }
+ break;
+ }
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented macro operation %u",
+ static_cast<u32>(opcode.operation.Value()));
+ }
+
+ if (opcode.is_exit) {
+ // Exit has a delay slot, execute the next instruction
+ // Note: Executing an exit during a branch delay slot will cause the instruction at the
+ // branch target to be executed before exiting.
+ Step(code, true);
+ return false;
+ }
+
+ return true;
+}
+
+MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
+ ASSERT((pc % sizeof(u32)) == 0);
+ ASSERT(pc < code.size() * sizeof(u32));
+ return {code[pc / sizeof(u32)]};
+}
+
+u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
+ switch (operation) {
+ case ALUOperation::Add:
+ return src_a + src_b;
+ // TODO(Subv): Implement AddWithCarry
+ case ALUOperation::Subtract:
+ return src_a - src_b;
+ // TODO(Subv): Implement SubtractWithBorrow
+ case ALUOperation::Xor:
+ return src_a ^ src_b;
+ case ALUOperation::Or:
+ return src_a | src_b;
+ case ALUOperation::And:
+ return src_a & src_b;
+ case ALUOperation::AndNot:
+ return src_a & ~src_b;
+ case ALUOperation::Nand:
+ return ~(src_a & src_b);
+
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented ALU operation %u", static_cast<u32>(operation));
+ }
+}
+
+void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) {
+ switch (operation) {
+ case ResultOperation::IgnoreAndFetch:
+ // Fetch parameter and ignore result.
+ SetRegister(reg, FetchParameter());
+ break;
+ case ResultOperation::Move:
+ // Move result.
+ SetRegister(reg, result);
+ break;
+ case ResultOperation::MoveAndSetMethod:
+ // Move result and use as Method Address.
+ SetRegister(reg, result);
+ SetMethodAddress(result);
+ break;
+ case ResultOperation::FetchAndSend:
+ // Fetch parameter and send result.
+ SetRegister(reg, FetchParameter());
+ Send(result);
+ break;
+ case ResultOperation::MoveAndSend:
+ // Move and send result.
+ SetRegister(reg, result);
+ Send(result);
+ break;
+ case ResultOperation::FetchAndSetMethod:
+ // Fetch parameter and use result as Method Address.
+ SetRegister(reg, FetchParameter());
+ SetMethodAddress(result);
+ break;
+ case ResultOperation::MoveAndSetMethodFetchAndSend:
+ // Move result and use as Method Address, then fetch and send parameter.
+ SetRegister(reg, result);
+ SetMethodAddress(result);
+ Send(FetchParameter());
+ break;
+ case ResultOperation::MoveAndSetMethodSend:
+ // Move result and use as Method Address, then send bits 12:17 of result.
+ SetRegister(reg, result);
+ SetMethodAddress(result);
+ Send((result >> 12) & 0b111111);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented result operation %u", static_cast<u32>(operation));
+ }
+}
+
+u32 MacroInterpreter::FetchParameter() {
+ ASSERT(next_parameter_index < parameters.size());
+ return parameters[next_parameter_index++];
+}
+
+u32 MacroInterpreter::GetRegister(u32 register_id) const {
+ // Register 0 is supposed to always return 0.
+ if (register_id == 0)
+ return 0;
+
+ ASSERT(register_id < registers.size());
+ return registers[register_id];
+}
+
+void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
+ // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
+ // register.
+ if (register_id == 0)
+ return;
+
+ ASSERT(register_id < registers.size());
+ registers[register_id] = value;
+}
+
+void MacroInterpreter::SetMethodAddress(u32 address) {
+ method_address.raw = address;
+}
+
+void MacroInterpreter::Send(u32 value) {
+ maxwell3d.WriteReg(method_address.address, value, 0);
+ // Increment the method address by the method increment.
+ method_address.address.Assign(method_address.address.Value() +
+ method_address.increment.Value());
+}
+
+u32 MacroInterpreter::Read(u32 method) const {
+ return maxwell3d.GetRegisterValue(method);
+}
+
+bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const {
+ switch (cond) {
+ case BranchCondition::Zero:
+ return value == 0;
+ case BranchCondition::NotZero:
+ return value != 0;
+ }
+ UNREACHABLE();
+}
+
+} // namespace Tegra
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
new file mode 100644
index 000000000..a71e359d8
--- /dev/null
+++ b/src/video_core/macro_interpreter.h
@@ -0,0 +1,164 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+#include <boost/optional.hpp>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Engines {
+class Maxwell3D;
+}
+
+class MacroInterpreter final {
+public:
+ explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
+
+ /**
+ * Executes the macro code with the specified input parameters.
+ * @param code The macro byte code to execute
+ * @param parameters The parameters of the macro
+ */
+ void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
+
+private:
+ enum class Operation : u32 {
+ ALU = 0,
+ AddImmediate = 1,
+ ExtractInsert = 2,
+ ExtractShiftLeftImmediate = 3,
+ ExtractShiftLeftRegister = 4,
+ Read = 5,
+ Unused = 6, // This operation doesn't seem to be a valid encoding.
+ Branch = 7,
+ };
+
+ enum class ALUOperation : u32 {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ // Operations 4-7 don't seem to be valid encodings.
+ Xor = 8,
+ Or = 9,
+ And = 10,
+ AndNot = 11,
+ Nand = 12
+ };
+
+ enum class ResultOperation : u32 {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMethod = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMethod = 5,
+ MoveAndSetMethodFetchAndSend = 6,
+ MoveAndSetMethodSend = 7
+ };
+
+ enum class BranchCondition : u32 {
+ Zero = 0,
+ NotZero = 1,
+ };
+
+ union Opcode {
+ u32 raw;
+ BitField<0, 3, Operation> operation;
+ BitField<4, 3, ResultOperation> result_operation;
+ BitField<4, 1, BranchCondition> branch_condition;
+ BitField<5, 1, u32>
+ branch_annul; // If set on a branch, then the branch doesn't have a delay slot.
+ BitField<7, 1, u32> is_exit;
+ BitField<8, 3, u32> dst;
+ BitField<11, 3, u32> src_a;
+ BitField<14, 3, u32> src_b;
+ // The signed immediate overlaps the second source operand and the alu operation.
+ BitField<14, 18, s32> immediate;
+
+ BitField<17, 5, ALUOperation> alu_operation;
+
+ // Bitfield instructions data
+ BitField<17, 5, u32> bf_src_bit;
+ BitField<22, 5, u32> bf_size;
+ BitField<27, 5, u32> bf_dst_bit;
+
+ u32 GetBitfieldMask() const {
+ return (1 << bf_size) - 1;
+ }
+ };
+
+ union MethodAddress {
+ u32 raw;
+ BitField<0, 12, u32> address;
+ BitField<12, 6, u32> increment;
+ };
+
+ /// Resets the execution engine state, zeroing registers, etc.
+ void Reset();
+
+ /**
+ * Executes a single macro instruction located at the current program counter. Returns whether
+ * the interpreter should keep running.
+ * @param code The macro code to execute.
+ * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
+ * previous instruction.
+ */
+ bool Step(const std::vector<u32>& code, bool is_delay_slot);
+
+ /// Calculates the result of an ALU operation. src_a OP src_b;
+ u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
+
+ /// Performs the result operation on the input result and stores it in the specified register
+ /// (if necessary).
+ void ProcessResult(ResultOperation operation, u32 reg, u32 result);
+
+ /// Evaluates the branch condition and returns whether the branch should be taken or not.
+ bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
+
+ /// Reads an opcode at the current program counter location.
+ Opcode GetOpcode(const std::vector<u32>& code) const;
+
+ /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
+ u32 GetRegister(u32 register_id) const;
+
+ /// Sets the register to the input value.
+ void SetRegister(u32 register_id, u32 value);
+
+ /// Sets the method address to use for the next Send instruction.
+ void SetMethodAddress(u32 address);
+
+ /// Calls a GPU Engine method with the input parameter.
+ void Send(u32 value);
+
+ /// Reads a GPU register located at the method address.
+ u32 Read(u32 method) const;
+
+ /// Returns the next parameter in the parameter queue.
+ u32 FetchParameter();
+
+ Engines::Maxwell3D& maxwell3d;
+
+ u32 pc; ///< Current program counter
+ boost::optional<u32>
+ delayed_pc; ///< Program counter to execute at after the delay slot is executed.
+
+ static constexpr size_t NumMacroRegisters = 8;
+
+ /// General purpose macro registers.
+ std::array<u32, NumMacroRegisters> registers = {};
+
+ /// Method address to use for the next Send instruction.
+ MethodAddress method_address = {};
+
+ /// Input parameters of the current macro.
+ std::vector<u32> parameters;
+ /// Index of the next parameter that will be fetched by the 'parm' instruction.
+ u32 next_parameter_index = 0;
+};
+} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6c7bd0826..8239f9aad 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -5,6 +5,7 @@
#pragma once
#include "common/common_types.h"
+#include "video_core/gpu.h"
struct ScreenInfo;
@@ -14,8 +15,8 @@ class RasterizerInterface {
public:
virtual ~RasterizerInterface() {}
- /// Draw the current batch of triangles
- virtual void DrawTriangles() = 0;
+ /// Draw the current batch of vertex arrays
+ virtual void DrawArrays() = 0;
/// Notify rasterizer that the specified Maxwell register has been changed
virtual void NotifyMaxwellRegisterChanged(u32 id) = 0;
@@ -24,14 +25,14 @@ public:
virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
- virtual void FlushRegion(PAddr addr, u32 size) = 0;
+ virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
- virtual void InvalidateRegion(PAddr addr, u32 size) = 0;
+ virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
/// and invalidated
- virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
+ virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
/// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0
virtual bool AccelerateDisplayTransfer(const void* config) {
@@ -49,7 +50,8 @@ public:
}
/// Attempt to use a faster method to display the framebuffer to screen
- virtual bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride,
+ virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
+ VAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) {
return false;
}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 51e1d45f9..30075b23c 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -5,6 +5,11 @@
#include <atomic>
#include <memory>
#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/video_core.h"
-void RendererBase::RefreshRasterizerSetting() {}
+void RendererBase::RefreshRasterizerSetting() {
+ if (rasterizer == nullptr) {
+ rasterizer = std::make_unique<RasterizerOpenGL>();
+ }
+}
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2aba50eda..89a960eaf 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -8,6 +8,8 @@
#include <boost/optional.hpp>
#include "common/assert.h"
#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_interface.h"
class EmuWindow;
@@ -16,40 +18,10 @@ public:
/// Used to reference a framebuffer
enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture };
- /**
- * Struct describing framebuffer metadata
- * TODO(bunnei): This struct belongs in the GPU code, but we don't have a good place for it yet.
- */
- struct FramebufferInfo {
- enum class PixelFormat : u32 {
- ABGR8 = 1,
- };
-
- /**
- * Returns the number of bytes per pixel.
- */
- static u32 BytesPerPixel(PixelFormat format) {
- switch (format) {
- case PixelFormat::ABGR8:
- return 4;
- }
-
- UNREACHABLE();
- }
-
- VAddr address;
- u32 offset;
- u32 width;
- u32 height;
- u32 stride;
- PixelFormat pixel_format;
- bool flip_vertical;
- };
-
virtual ~RendererBase() {}
/// Swap buffers (render frame)
- virtual void SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) = 0;
+ virtual void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) = 0;
/**
* Set the emulator window to use for renderer
@@ -74,12 +46,16 @@ public:
return m_current_frame;
}
+ VideoCore::RasterizerInterface* Rasterizer() const {
+ return rasterizer.get();
+ }
+
void RefreshRasterizerSetting();
protected:
+ std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
private:
- bool opengl_rasterizer_active = false;
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 24cfff229..911890f16 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -14,11 +14,16 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/vector_math.h"
+#include "core/core.h"
+#include "core/hle/kernel/process.h"
#include "core/settings.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = SurfaceParams::PixelFormat;
using SurfaceType = SurfaceParams::SurfaceType;
@@ -54,6 +59,8 @@ static void SetShaderUniformBlockBindings(GLuint shader) {
}
RasterizerOpenGL::RasterizerOpenGL() {
+ shader_dirty = true;
+
has_ARB_buffer_storage = false;
has_ARB_direct_state_access = false;
has_ARB_separate_shader_objects = false;
@@ -106,8 +113,6 @@ RasterizerOpenGL::RasterizerOpenGL() {
state.draw.vertex_buffer = stream_buffer->GetHandle();
pipeline.Create();
- vs_input_index_min = 0;
- vs_input_index_max = 0;
state.draw.program_pipeline = pipeline.handle;
state.draw.shader_program = 0;
state.draw.vertex_array = hw_vao.handle;
@@ -120,20 +125,14 @@ RasterizerOpenGL::RasterizerOpenGL() {
glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY);
glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle);
} else {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
accelerate_draw = AccelDraw::Disabled;
glEnable(GL_BLEND);
- // Sync fixed function OpenGL state
- SyncClipEnabled();
- SyncClipCoef();
- SyncCullMode();
- SyncBlendEnabled();
- SyncBlendFuncs();
- SyncBlendColor();
+ LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
}
RasterizerOpenGL::~RasterizerOpenGL() {
@@ -144,47 +143,235 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
-static constexpr std::array<GLenum, 4> vs_attrib_types{
- GL_BYTE, // VertexAttributeFormat::BYTE
- GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
- GL_SHORT, // VertexAttributeFormat::SHORT
- GL_FLOAT // VertexAttributeFormat::FLOAT
-};
-
void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
- UNIMPLEMENTED();
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ if (is_indexed) {
+ UNREACHABLE();
+ }
+
+ // TODO(bunnei): Add support for 1+ vertex arrays
+ vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride;
}
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO);
- UNIMPLEMENTED();
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+ const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
+
+ state.draw.vertex_array = hw_vao.handle;
+ state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.Apply();
+
+ // TODO(bunnei): Add support for 1+ vertex arrays
+ const auto& vertex_array{regs.vertex_array[0]};
+ ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
+ ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
+ for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
+ ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index);
+ }
+
+ // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
+ // Enables the first 16 vertex attributes always, as we don't know which ones are actually used
+ // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now
+ // to avoid OpenGL errors.
+ for (unsigned index = 0; index < 16; ++index) {
+ auto& attrib = regs.vertex_attrib_format[index];
+ glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+ attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
+ reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
+ glEnableVertexAttribArray(index);
+ hw_vao_enabled_attributes[index] = true;
+ }
+
+ // Copy vertex array data
+ const u32 data_size{vertex_array.stride * regs.vertex_buffer.count};
+ const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
+ res_cache.FlushRegion(data_addr, data_size, nullptr);
+ Memory::ReadBlock(data_addr, array_ptr, data_size);
+
+ array_ptr += data_size;
+ buffer_offset += data_size;
}
void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VS);
- UNIMPLEMENTED();
+ LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle);
}
void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_FS);
- UNIMPLEMENTED();
+ UNREACHABLE();
}
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
if (!has_ARB_separate_shader_objects) {
- UNIMPLEMENTED();
+ UNREACHABLE();
return false;
}
accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
- DrawTriangles();
+ DrawArrays();
return true;
}
-void RasterizerOpenGL::DrawTriangles() {
+void RasterizerOpenGL::DrawArrays() {
+ if (accelerate_draw == AccelDraw::Disabled)
+ return;
+
MICROPROFILE_SCOPE(OpenGL_Drawing);
- UNIMPLEMENTED();
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ // TODO(bunnei): Implement these
+ const bool has_stencil = false;
+ const bool using_color_fb = true;
+ const bool using_depth_fb = false;
+ const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()};
+
+ const bool write_color_fb =
+ state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
+ state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
+
+ const bool write_depth_fb =
+ (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
+ (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0);
+
+ Surface color_surface;
+ Surface depth_surface;
+ MathUtil::Rectangle<u32> surfaces_rect;
+ std::tie(color_surface, depth_surface, surfaces_rect) =
+ res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect);
+
+ const u16 res_scale = color_surface != nullptr
+ ? color_surface->res_scale
+ : (depth_surface == nullptr ? 1u : depth_surface->res_scale);
+
+ MathUtil::Rectangle<u32> draw_rect{
+ static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) +
+ viewport_rect.left * res_scale,
+ surfaces_rect.left, surfaces_rect.right)), // Left
+ static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
+ viewport_rect.top * res_scale,
+ surfaces_rect.bottom, surfaces_rect.top)), // Top
+ static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) +
+ viewport_rect.right * res_scale,
+ surfaces_rect.left, surfaces_rect.right)), // Right
+ static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
+ viewport_rect.bottom * res_scale,
+ surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+
+ // Bind the framebuffer surfaces
+ BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
+
+ // Sync the viewport
+ SyncViewport(surfaces_rect, res_scale);
+
+ // TODO(bunnei): Sync framebuffer_scale uniform here
+ // TODO(bunnei): Sync scissorbox uniform(s) here
+ // TODO(bunnei): Sync and bind the texture surfaces
+
+ // Sync and bind the shader
+ if (shader_dirty) {
+ SetShader();
+ shader_dirty = false;
+ }
+
+ // Sync the uniform data
+ if (uniform_block_data.dirty) {
+ glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data);
+ uniform_block_data.dirty = false;
+ }
+
+ // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
+ // scissor test to prevent drawing outside of the framebuffer region
+ state.scissor.enabled = true;
+ state.scissor.x = draw_rect.left;
+ state.scissor.y = draw_rect.bottom;
+ state.scissor.width = draw_rect.GetWidth();
+ state.scissor.height = draw_rect.GetHeight();
+ state.Apply();
+
+ // Draw the vertex batch
+ const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
+ AnalyzeVertexArray(is_indexed);
+ state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.Apply();
+
+ size_t buffer_size = static_cast<size_t>(vs_input_size);
+ if (is_indexed) {
+ UNREACHABLE();
+ }
+ buffer_size += sizeof(VSUniformData);
+
+ size_t ptr_pos = 0;
+ u8* buffer_ptr;
+ GLintptr buffer_offset;
+ std::tie(buffer_ptr, buffer_offset) =
+ stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4);
+
+ SetupVertexArray(buffer_ptr, buffer_offset);
+ ptr_pos += vs_input_size;
+
+ GLintptr index_buffer_offset = 0;
+ if (is_indexed) {
+ UNREACHABLE();
+ }
+
+ SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]),
+ buffer_offset + static_cast<GLintptr>(ptr_pos));
+ const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+ ptr_pos += sizeof(VSUniformData);
+
+ stream_buffer->Unmap();
+
+ const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
+ if (has_ARB_direct_state_access) {
+ glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
+ } else {
+ glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
+ glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
+ }
+ };
+
+ copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData));
+
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle);
+
+ if (is_indexed) {
+ UNREACHABLE();
+ } else {
+ glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0,
+ regs.vertex_buffer.count);
+ }
+
+ // Disable scissor test
+ state.scissor.enabled = false;
+
+ accelerate_draw = AccelDraw::Disabled;
+
+ // Unbind textures for potential future use as framebuffer attachments
+ for (auto& texture_unit : state.texture_units) {
+ texture_unit.texture_2d = 0;
+ }
+ state.Apply();
+
+ // Mark framebuffer surfaces as dirty
+ MathUtil::Rectangle<u32> draw_rect_unscaled{
+ draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
+ draw_rect.bottom / res_scale};
+
+ if (color_surface != nullptr && write_color_fb) {
+ auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled);
+ res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
+ color_surface);
+ }
+ if (depth_surface != nullptr && write_depth_fb) {
+ auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled);
+ res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
+ depth_surface);
+ }
}
void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {}
@@ -194,17 +381,17 @@ void RasterizerOpenGL::FlushAll() {
res_cache.FlushAll();
}
-void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
}
-void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) {
+void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size, nullptr);
}
-void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
res_cache.InvalidateRegion(addr, size, nullptr);
@@ -212,58 +399,180 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
- UNIMPLEMENTED();
+ UNREACHABLE();
return true;
}
bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) {
- UNIMPLEMENTED();
+ UNREACHABLE();
return true;
}
bool RasterizerOpenGL::AccelerateFill(const void* config) {
- UNIMPLEMENTED();
+ UNREACHABLE();
return true;
}
-bool RasterizerOpenGL::AccelerateDisplay(const void* config, PAddr framebuffer_addr,
- u32 pixel_stride, ScreenInfo& screen_info) {
- UNIMPLEMENTED();
+bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
+ VAddr framebuffer_addr, u32 pixel_stride,
+ ScreenInfo& screen_info) {
+ if (framebuffer_addr == 0) {
+ return false;
+ }
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+
+ SurfaceParams src_params;
+ src_params.addr = framebuffer_addr;
+ src_params.width = std::min(framebuffer.width, pixel_stride);
+ src_params.height = framebuffer.height;
+ src_params.stride = pixel_stride;
+ src_params.is_tiled = false;
+ src_params.pixel_format =
+ SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
+ src_params.UpdateParams();
+
+ MathUtil::Rectangle<u32> src_rect;
+ Surface src_surface;
+ std::tie(src_surface, src_rect) =
+ res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
+
+ if (src_surface == nullptr) {
+ return false;
+ }
+
+ u32 scaled_width = src_surface->GetScaledWidth();
+ u32 scaled_height = src_surface->GetScaledHeight();
+
+ screen_info.display_texcoords = MathUtil::Rectangle<float>(
+ (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
+ (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
+
+ screen_info.display_texture = src_surface->texture.handle;
+
return true;
}
void RasterizerOpenGL::SetShader() {
- UNIMPLEMENTED();
+ // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to
+ // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell
+ // shaders.
+
+ static constexpr char vertex_shader[] = R"(
+#version 150 core
+
+in vec2 vert_position;
+in vec2 vert_tex_coord;
+out vec2 frag_tex_coord;
+
+void main() {
+ // Multiply input position by the rotscale part of the matrix and then manually translate by
+ // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+ // to `vec3(vert_position.xy, 1.0)`
+ gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0);
+ frag_tex_coord = vert_tex_coord;
+}
+)";
+
+ static constexpr char fragment_shader[] = R"(
+#version 150 core
+
+in vec2 frag_tex_coord;
+out vec4 color;
+
+uniform sampler2D color_texture;
+
+void main() {
+ color = vec4(1.0, 0.0, 1.0, 0.0);
+}
+)";
+
+ if (current_shader) {
+ return;
+ }
+
+ LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
+
+ current_shader = &test_shader;
+ if (has_ARB_separate_shader_objects) {
+ test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true);
+ glActiveShaderProgram(pipeline.handle, test_shader.shader.handle);
+ } else {
+ UNREACHABLE();
+ }
+
+ state.draw.shader_program = test_shader.shader.handle;
+ state.Apply();
+
+ if (has_ARB_separate_shader_objects) {
+ state.draw.shader_program = 0;
+ state.Apply();
+ }
+}
+
+void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
+ const Surface& depth_surface, bool has_stencil) {
+ state.draw.draw_framebuffer = framebuffer.handle;
+ state.Apply();
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+ color_surface != nullptr ? color_surface->texture.handle : 0, 0);
+ if (depth_surface != nullptr) {
+ if (has_stencil) {
+ // attach both depth and stencil
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->texture.handle, 0);
+ } else {
+ // attach depth
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->texture.handle, 0);
+ // clear stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ }
+ } else {
+ // clear both depth and stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ }
+}
+
+void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+ const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()};
+
+ state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale;
+ state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
+ state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale);
+ state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale);
}
void RasterizerOpenGL::SyncClipEnabled() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
void RasterizerOpenGL::SyncClipCoef() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
void RasterizerOpenGL::SyncCullMode() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
void RasterizerOpenGL::SyncDepthScale() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
void RasterizerOpenGL::SyncDepthOffset() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
void RasterizerOpenGL::SyncBlendEnabled() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
void RasterizerOpenGL::SyncBlendFuncs() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
void RasterizerOpenGL::SyncBlendColor() {
- UNIMPLEMENTED();
+ UNREACHABLE();
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 893fc530f..fd53e94cd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -29,19 +29,25 @@ public:
RasterizerOpenGL();
~RasterizerOpenGL() override;
- void DrawTriangles() override;
+ void DrawArrays() override;
void NotifyMaxwellRegisterChanged(u32 id) override;
void FlushAll() override;
- void FlushRegion(PAddr addr, u32 size) override;
- void InvalidateRegion(PAddr addr, u32 size) override;
- void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
+ void FlushRegion(VAddr addr, u64 size) override;
+ void InvalidateRegion(VAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
bool AccelerateDisplayTransfer(const void* config) override;
bool AccelerateTextureCopy(const void* config) override;
bool AccelerateFill(const void* config) override;
- bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride,
- ScreenInfo& screen_info) override;
+ bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr,
+ u32 pixel_stride, ScreenInfo& screen_info) override;
bool AccelerateDrawBatch(bool is_indexed) override;
+ /// OpenGL shader generated for a given Maxwell register state
+ struct MaxwellShader {
+ /// OpenGL shader resource
+ OGLShader shader;
+ };
+
struct VertexShader {
OGLShader shader;
};
@@ -81,6 +87,13 @@ public:
private:
struct SamplerInfo {};
+ /// Binds the framebuffer color and depth surface
+ void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
+ bool has_stencil);
+
+ /// Syncs the viewport to match the guest state
+ void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
+
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled();
@@ -117,6 +130,12 @@ private:
RasterizerCacheOpenGL res_cache;
+ /// Shader used for test renderering - to be removed once we have emulated shaders
+ MaxwellShader test_shader{};
+
+ const MaxwellShader* current_shader{};
+ bool shader_dirty{};
+
struct {
UniformData data;
bool dirty;
@@ -127,7 +146,7 @@ private:
OGLVertexArray hw_vao;
std::array<bool, 16> hw_vao_enabled_attributes;
- std::array<SamplerInfo, 3> texture_samplers;
+ std::array<SamplerInfo, 32> texture_samplers;
static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
std::unique_ptr<OGLStreamBuffer> vertex_buffer;
OGLBuffer uniform_buffer;
@@ -136,8 +155,6 @@ private:
static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024;
std::unique_ptr<OGLStreamBuffer> stream_buffer;
- GLint vs_input_index_min;
- GLint vs_input_index_max;
GLsizeiptr vs_input_size;
void AnalyzeVertexArray(bool is_indexed);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 884637ca5..2ffbd3bab 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -21,9 +21,13 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/vector_math.h"
+#include "core/core.h"
#include "core/frontend/emu_window.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/vm_manager.h"
#include "core/memory.h"
#include "core/settings.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/utils.h"
@@ -107,67 +111,28 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
}
template <bool morton_to_gl, PixelFormat format>
-static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) {
+static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
- constexpr u32 tile_size = bytes_per_pixel * 64;
-
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
- static_assert(gl_bytes_per_pixel >= bytes_per_pixel, "");
- gl_buffer += gl_bytes_per_pixel - bytes_per_pixel;
-
- const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
- const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
- const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
-
- ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end));
-
- const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel;
- u32 x = static_cast<u32>((begin_pixel_index % (stride * 8)) / 8);
- u32 y = static_cast<u32>((begin_pixel_index / (stride * 8)) * 8);
-
- gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel;
-
- auto glbuf_next_tile = [&] {
- x = (x + 8) % stride;
- gl_buffer += 8 * gl_bytes_per_pixel;
- if (!x) {
- y += 8;
- gl_buffer -= stride * 9 * gl_bytes_per_pixel;
- }
- };
- u8* tile_buffer = Memory::GetPhysicalPointer(start);
-
- if (start < aligned_start && !morton_to_gl) {
- std::array<u8, tile_size> tmp_buf;
- MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
- std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start],
- std::min(aligned_start, end) - start);
-
- tile_buffer += aligned_start - start;
- glbuf_next_tile();
- }
-
- const u8* const buffer_end = tile_buffer + aligned_end - aligned_start;
- while (tile_buffer < buffer_end) {
- MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer);
- tile_buffer += tile_size;
- glbuf_next_tile();
- }
-
- if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) {
- std::array<u8, tile_size> tmp_buf;
- MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
- std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end);
- }
+ // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
+ // configuration for this and perform more generic un/swizzle
+ LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+ VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
+ Memory::GetPointer(base), gl_buffer, morton_to_gl);
}
-static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = {
- MortonCopy<true, PixelFormat::RGBA8>, // 0
- MortonCopy<true, PixelFormat::RGB8>, // 1
- MortonCopy<true, PixelFormat::RGB5A1>, // 2
- MortonCopy<true, PixelFormat::RGB565>, // 3
- MortonCopy<true, PixelFormat::RGBA4>, // 4
+static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> morton_to_gl_fns = {
+ MortonCopy<true, PixelFormat::RGBA8>,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
nullptr,
nullptr,
nullptr,
@@ -176,19 +141,19 @@ static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> mo
nullptr,
nullptr,
nullptr,
- nullptr, // 5 - 13
- MortonCopy<true, PixelFormat::D16>, // 14
- nullptr, // 15
- MortonCopy<true, PixelFormat::D24>, // 16
- MortonCopy<true, PixelFormat::D24S8> // 17
};
-static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = {
- MortonCopy<false, PixelFormat::RGBA8>, // 0
- MortonCopy<false, PixelFormat::RGB8>, // 1
- MortonCopy<false, PixelFormat::RGB5A1>, // 2
- MortonCopy<false, PixelFormat::RGB565>, // 3
- MortonCopy<false, PixelFormat::RGBA4>, // 4
+static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> gl_to_morton_fns = {
+ MortonCopy<false, PixelFormat::RGBA8>,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
nullptr,
nullptr,
nullptr,
@@ -197,11 +162,6 @@ static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl
nullptr,
nullptr,
nullptr,
- nullptr, // 5 - 13
- MortonCopy<false, PixelFormat::D16>, // 14
- nullptr, // 15
- MortonCopy<false, PixelFormat::D24>, // 16
- MortonCopy<false, PixelFormat::D24S8> // 17
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -290,17 +250,17 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
static bool FillSurface(const Surface& surface, const u8* fill_data,
const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) {
- UNIMPLEMENTED();
- return true;
+ UNREACHABLE();
+ return {};
}
SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const {
SurfaceParams params = *this;
const u32 tiled_size = is_tiled ? 8 : 1;
const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size);
- PAddr aligned_start =
+ VAddr aligned_start =
addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes);
- PAddr aligned_end =
+ VAddr aligned_end =
addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes);
if (aligned_end - aligned_start > stride_tiled_bytes) {
@@ -527,10 +487,10 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
-void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
+void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
ASSERT(type != SurfaceType::Fill);
- const u8* const texture_src_data = Memory::GetPhysicalPointer(addr);
+ u8* const texture_src_data = Memory::GetPointer(addr);
if (texture_src_data == nullptr)
return;
@@ -539,35 +499,30 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
gl_buffer.reset(new u8[gl_buffer_size]);
}
- // TODO: Should probably be done in ::Memory:: and check for other regions too
- if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END)
- load_end = Memory::VRAM_VADDR_END;
-
- if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR)
- load_start = Memory::VRAM_VADDR;
-
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
ASSERT(load_start >= addr && load_end <= end);
- const u32 start_offset = load_start - addr;
+ const u64 start_offset = load_start - addr;
if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
- std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
- load_end - load_start);
+ const u32 bytes_per_pixel{GetFormatBpp() >> 3};
+
+ // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
+ // the configuration for this and perform more generic un/swizzle
+ LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+ VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
+ texture_src_data + start_offset, &gl_buffer[start_offset],
+ true);
} else {
- if (type == SurfaceType::Texture) {
- UNIMPLEMENTED();
- } else {
- morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
- load_start, load_end);
- }
+ morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
+ load_start, load_end);
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
-void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
- u8* const dst_buffer = Memory::GetPhysicalPointer(addr);
+void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
+ u8* const dst_buffer = Memory::GetPointer(addr);
if (dst_buffer == nullptr)
return;
@@ -1102,18 +1057,106 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams&
}
Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) {
- UNIMPLEMENTED();
+ UNREACHABLE();
return {};
}
SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
- bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport_rect) {
- UNIMPLEMENTED();
- return {};
+ bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+ const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
+ const auto& config = regs.rt[0];
+
+ // TODO(bunnei): This is hard corded to use just the first render buffer
+ LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");
+
+ // update resolution_scale_factor and reset cache if changed
+ // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We
+ // need to fix this before making the renderer multi-threaded.
+ static u16 resolution_scale_factor = GetResolutionScaleFactor();
+ if (resolution_scale_factor != GetResolutionScaleFactor()) {
+ resolution_scale_factor = GetResolutionScaleFactor();
+ FlushAll();
+ while (!surface_cache.empty())
+ UnregisterSurface(*surface_cache.begin()->second.begin());
+ }
+
+ MathUtil::Rectangle<u32> viewport_clamped{
+ static_cast<u32>(MathUtil::Clamp(viewport.left, 0, static_cast<s32>(config.width))),
+ static_cast<u32>(MathUtil::Clamp(viewport.top, 0, static_cast<s32>(config.height))),
+ static_cast<u32>(MathUtil::Clamp(viewport.right, 0, static_cast<s32>(config.width))),
+ static_cast<u32>(MathUtil::Clamp(viewport.bottom, 0, static_cast<s32>(config.height)))};
+
+ // get color and depth surfaces
+ SurfaceParams color_params;
+ color_params.is_tiled = true;
+ color_params.res_scale = resolution_scale_factor;
+ color_params.width = config.width;
+ color_params.height = config.height;
+ SurfaceParams depth_params = color_params;
+
+ color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address());
+ color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
+ color_params.UpdateParams();
+
+ ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
+ // depth_params.addr = config.GetDepthBufferPhysicalAddress();
+ // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format);
+ // depth_params.UpdateParams();
+
+ auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped);
+ auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped);
+
+ // Make sure that framebuffers don't overlap if both color and depth are being used
+ if (using_color_fb && using_depth_fb &&
+ boost::icl::length(color_vp_interval & depth_vp_interval)) {
+ LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
+ "overlapping framebuffers not supported!");
+ using_depth_fb = false;
+ }
+
+ MathUtil::Rectangle<u32> color_rect{};
+ Surface color_surface = nullptr;
+ if (using_color_fb)
+ std::tie(color_surface, color_rect) =
+ GetSurfaceSubRect(color_params, ScaleMatch::Exact, false);
+
+ MathUtil::Rectangle<u32> depth_rect{};
+ Surface depth_surface = nullptr;
+ if (using_depth_fb)
+ std::tie(depth_surface, depth_rect) =
+ GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false);
+
+ MathUtil::Rectangle<u32> fb_rect{};
+ if (color_surface != nullptr && depth_surface != nullptr) {
+ fb_rect = color_rect;
+ // Color and Depth surfaces must have the same dimensions and offsets
+ if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
+ color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
+ color_surface = GetSurface(color_params, ScaleMatch::Exact, false);
+ depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false);
+ fb_rect = color_surface->GetScaledRect();
+ }
+ } else if (color_surface != nullptr) {
+ fb_rect = color_rect;
+ } else if (depth_surface != nullptr) {
+ fb_rect = depth_rect;
+ }
+
+ if (color_surface != nullptr) {
+ ValidateSurface(color_surface, boost::icl::first(color_vp_interval),
+ boost::icl::length(color_vp_interval));
+ }
+ if (depth_surface != nullptr) {
+ ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval),
+ boost::icl::length(depth_vp_interval));
+ }
+
+ return std::make_tuple(color_surface, depth_surface, fb_rect);
}
Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) {
- UNIMPLEMENTED();
+ UNREACHABLE();
return {};
}
@@ -1167,7 +1210,7 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface,
}
}
-void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u64 size) {
+void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) {
if (size == 0)
return;
@@ -1227,7 +1270,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr,
}
}
-void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surface) {
+void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) {
if (size == 0)
return;
@@ -1260,10 +1303,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surf
}
void RasterizerCacheOpenGL::FlushAll() {
- FlushRegion(0, 0xFFFFFFFF);
+ FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
}
-void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner) {
+void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) {
if (size == 0)
return;
@@ -1356,6 +1399,34 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}});
}
-void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u64 size, int delta) {
- UNIMPLEMENTED();
+void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+ const u64 num_pages =
+ ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1;
+ const u64 page_start = addr >> Memory::PAGE_BITS;
+ const u64 page_end = page_start + num_pages;
+
+ // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
+ // subtract after iterating
+ const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
+ if (delta > 0)
+ cached_pages.add({pages_interval, delta});
+
+ for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
+ const auto interval = pair.first & pages_interval;
+ const int count = pair.second;
+
+ const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
+ const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
+ const u64 interval_size = interval_end_addr - interval_start_addr;
+
+ if (delta > 0 && count == delta)
+ Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
+ else if (delta < 0 && count == -delta)
+ Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
+ else
+ ASSERT(count >= 0);
+ }
+
+ if (delta < 0)
+ cached_pages.add({pages_interval, delta});
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 17ce0fee7..1f660d30c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -22,15 +22,16 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "video_core/gpu.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
struct CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSet = std::set<Surface>;
-using SurfaceRegions = boost::icl::interval_set<PAddr>;
-using SurfaceMap = boost::icl::interval_map<PAddr, Surface>;
-using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet>;
+using SurfaceRegions = boost::icl::interval_set<VAddr>;
+using SurfaceMap = boost::icl::interval_map<VAddr, Surface>;
+using SurfaceCache = boost::icl::interval_map<VAddr, SurfaceSet>;
using SurfaceInterval = SurfaceCache::interval_type;
static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
@@ -40,7 +41,7 @@ static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval
using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
-using PageMap = boost::icl::interval_map<u32, int>;
+using PageMap = boost::icl::interval_map<u64, int>;
enum class ScaleMatch {
Exact, // only accept same res scale
@@ -115,6 +116,24 @@ struct SurfaceParams {
return GetFormatBpp(pixel_format);
}
+ static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
+ switch (format) {
+ case Tegra::RenderTargetFormat::RGBA8_UNORM:
+ return PixelFormat::RGBA8;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
+ switch (format) {
+ case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ return PixelFormat::RGBA8;
+ default:
+ UNREACHABLE();
+ }
+ }
+
static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b);
@@ -211,8 +230,8 @@ struct SurfaceParams {
MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
- PAddr addr = 0;
- PAddr end = 0;
+ VAddr addr = 0;
+ VAddr end = 0;
u64 size = 0;
u32 width = 0;
@@ -257,9 +276,9 @@ struct CachedSurface : SurfaceParams {
std::unique_ptr<u8[]> gl_buffer;
size_t gl_buffer_size = 0;
- // Read/Write data in 3DS memory to/from gl_buffer
- void LoadGLBuffer(PAddr load_start, PAddr load_end);
- void FlushGLBuffer(PAddr flush_start, PAddr flush_end);
+ // Read/Write data in Switch memory to/from gl_buffer
+ void LoadGLBuffer(VAddr load_start, VAddr load_end);
+ void FlushGLBuffer(VAddr flush_start, VAddr flush_end);
// Upload/Download data in gl_buffer in/to this surface's texture
void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
@@ -298,7 +317,7 @@ public:
/// Get the color and depth surfaces based on the framebuffer configuration
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
- const MathUtil::Rectangle<s32>& viewport_rect);
+ const MathUtil::Rectangle<s32>& viewport);
/// Get a surface that matches the fill config
Surface GetFillSurface(const void* config);
@@ -307,10 +326,10 @@ public:
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
/// Write any cached resources overlapping the region back to memory (if dirty)
- void FlushRegion(PAddr addr, u64 size, Surface flush_surface = nullptr);
+ void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr);
/// Mark region as being invalidated by region_owner (nullptr if 3DS memory)
- void InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner);
+ void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner);
/// Flush all cached resources tracked by this cache manager
void FlushAll();
@@ -319,7 +338,7 @@ private:
void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface);
/// Update surface's texture for given region when necessary
- void ValidateSurface(const Surface& surface, PAddr addr, u64 size);
+ void ValidateSurface(const Surface& surface, VAddr addr, u64 size);
/// Create a new surface
Surface CreateSurface(const SurfaceParams& params);
@@ -331,7 +350,7 @@ private:
void UnregisterSurface(const Surface& surface);
/// Increase/decrease the number of surface in pages touching the specified region
- void UpdatePagesCachedCount(PAddr addr, u64 size, int delta);
+ void UpdatePagesCachedCount(VAddr addr, u64 size, int delta);
SurfaceCache surface_cache;
PageMap cached_pages;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0e0ef18cc..564ea8f9e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -26,7 +26,7 @@ public:
sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {}
std::string Decompile() {
- UNIMPLEMENTED();
+ UNREACHABLE();
return {};
}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index f242bce1d..8f3c98800 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -8,12 +8,12 @@
namespace GLShader {
std::string GenerateVertexShader(const MaxwellVSConfig& config) {
- UNIMPLEMENTED();
+ UNREACHABLE();
return {};
}
std::string GenerateFragmentShader(const MaxwellFSConfig& config) {
- UNIMPLEMENTED();
+ UNREACHABLE();
return {};
}
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index a3ba16761..a6c6204d5 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -38,8 +38,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
} else {
- LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s",
- &vertex_shader_error[0]);
+ LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s",
+ &vertex_shader_error[0]);
}
}
}
@@ -62,8 +62,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]);
} else {
- LOG_ERROR(Render_OpenGL, "Error compiling geometry shader:\n%s",
- &geometry_shader_error[0]);
+ LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s",
+ &geometry_shader_error[0]);
}
}
}
@@ -86,8 +86,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
} else {
- LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s",
- &fragment_shader_error[0]);
+ LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s",
+ &fragment_shader_error[0]);
}
}
}
@@ -128,20 +128,20 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]);
} else {
- LOG_ERROR(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]);
+ LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]);
}
}
// If the program linking failed at least one of the shaders was probably bad
if (result == GL_FALSE) {
if (vertex_shader) {
- LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
+ LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
}
if (geometry_shader) {
- LOG_ERROR(Render_OpenGL, "Geometry shader:\n%s", geometry_shader);
+ LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader);
}
if (fragment_shader) {
- LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
+ LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
}
}
ASSERT_MSG(result == GL_TRUE, "Shader not linked");
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 940575dfa..c1f4efc8c 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -85,7 +85,7 @@ public:
struct {
GLuint texture_2d; // GL_TEXTURE_BINDING_2D
GLuint sampler; // GL_SAMPLER_BINDING
- } texture_units[3];
+ } texture_units[32];
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
new file mode 100644
index 000000000..d847317ac
--- /dev/null
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -0,0 +1,50 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <glad/glad.h>
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace MaxwellToGL {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
+ switch (attrib.type) {
+ case Maxwell::VertexAttribute::Type::UnsignedNorm: {
+
+ switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return GL_UNSIGNED_BYTE;
+ }
+
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str());
+ UNREACHABLE();
+ return {};
+ }
+
+ case Maxwell::VertexAttribute::Type::Float:
+ return GL_FLOAT;
+ }
+
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str());
+ UNREACHABLE();
+ return {};
+}
+
+inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+ switch (topology) {
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ return GL_TRIANGLE_STRIP;
+ }
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology);
+ UNREACHABLE();
+ return {};
+}
+
+} // namespace MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 65d38ade5..78b50b227 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -20,6 +20,7 @@
#include "core/settings.h"
#include "core/tracer/recorder.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/utils.h"
#include "video_core/video_core.h"
static const char vertex_shader[] = R"(
@@ -98,197 +99,88 @@ RendererOpenGL::RendererOpenGL() = default;
RendererOpenGL::~RendererOpenGL() = default;
/// Swap buffers (render frame)
-void RendererOpenGL::SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) {
+void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) {
+ Core::System::GetInstance().perf_stats.EndSystemFrame();
+
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.Apply();
- if (framebuffer_info != boost::none) {
- // If framebuffer_info is provided, reload it from memory to a texture
- if (screen_info.texture.width != (GLsizei)framebuffer_info->width ||
- screen_info.texture.height != (GLsizei)framebuffer_info->height ||
- screen_info.texture.pixel_format != framebuffer_info->pixel_format) {
+ if (framebuffer != boost::none) {
+ // If framebuffer is provided, reload it from memory to a texture
+ if (screen_info.texture.width != (GLsizei)framebuffer->width ||
+ screen_info.texture.height != (GLsizei)framebuffer->height ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
- ConfigureFramebufferTexture(screen_info.texture, *framebuffer_info);
+ ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
}
- LoadFBToScreenInfo(*framebuffer_info, screen_info);
- }
-
- DrawScreens();
- Core::System::GetInstance().perf_stats.EndSystemFrame();
+ // Load the framebuffer from memory, draw it to the screen, and swap buffers
+ LoadFBToScreenInfo(*framebuffer, screen_info);
+ DrawScreen();
+ render_window->SwapBuffers();
+ }
- // Swap buffers
render_window->PollEvents();
- render_window->SwapBuffers();
Core::System::GetInstance().frame_limiter.DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
Core::System::GetInstance().perf_stats.BeginSystemFrame();
+ // Restore the rasterizer state
prev_state.Apply();
RefreshRasterizerSetting();
}
-static inline u32 MortonInterleave128(u32 x, u32 y) {
- // 128x128 Z-Order coordinate from 2D coordinates
- static constexpr u32 xlut[] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
- 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
- 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
- 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
- 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
- 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
- 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
- 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
- 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
- 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
- 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
- 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
- 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
- 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
- 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
- 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
- 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
- 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
- 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
- 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
- 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
- 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
- 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
- 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
- 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
- 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
- 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
- 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
- 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
- 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
- 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
- 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
- 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
- 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
- };
- static constexpr u32 ylut[] = {
- 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
- 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
- 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
- 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
- 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
- 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
- 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
- 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
- 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
- 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
- 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
- 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
- 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
- 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
- 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
- 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
- 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
- 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
- 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
- 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
- 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
- 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
- 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
- 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
- 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
- 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
- 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
- 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
- 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
- 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
- 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
- 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
- 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
- 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
- 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
- };
- return xlut[x % 128] + ylut[y % 128];
-}
-
-static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
- // Calculates the offset of the position of the pixel in Morton order
- // Framebuffer images are split into 128x128 tiles.
-
- const unsigned int block_height = 128;
- const unsigned int coarse_x = x & ~127;
-
- u32 i = MortonInterleave128(x, y);
-
- const unsigned int offset = coarse_x * block_height;
-
- return (i + offset) * bytes_per_pixel;
-}
-
-static void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel,
- u8* morton_data, u8* gl_data, bool morton_to_gl) {
- u8* data_ptrs[2];
- for (unsigned y = 0; y < height; ++y) {
- for (unsigned x = 0; x < width; ++x) {
- const u32 coarse_y = y & ~127;
- u32 morton_offset =
- GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
-
- data_ptrs[morton_to_gl] = morton_data + morton_offset;
- data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
-
- memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
- }
- }
-}
-
/**
* Loads framebuffer from emulated memory into the active OpenGL texture.
*/
-void RendererOpenGL::LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info,
+void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer,
ScreenInfo& screen_info) {
- const u32 bpp{FramebufferInfo::BytesPerPixel(framebuffer_info.pixel_format)};
- const u32 size_in_bytes{framebuffer_info.stride * framebuffer_info.height * bpp};
+ const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
+ const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
+ const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
- MortonCopyPixels128(framebuffer_info.width, framebuffer_info.height, bpp, 4,
- Memory::GetPointer(framebuffer_info.address), gl_framebuffer_data.data(),
- true);
-
- LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%llx(%dx%d), fmt %x", size_in_bytes,
- framebuffer_info.address, framebuffer_info.width, framebuffer_info.height,
- (int)framebuffer_info.pixel_format);
+ // Framebuffer orientation handling
+ framebuffer_transform_flags = framebuffer.transform_flags;
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
// only allows rows to have a memory alignement of 4.
- ASSERT(framebuffer_info.stride % 4 == 0);
+ ASSERT(framebuffer.stride % 4 == 0);
- framebuffer_flip_vertical = framebuffer_info.flip_vertical;
+ if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride,
+ screen_info)) {
+ // Reset the screen info's display texture to its own permanent texture
+ screen_info.display_texture = screen_info.texture.resource.handle;
+ screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
- // Reset the screen info's display texture to its own permanent texture
- screen_info.display_texture = screen_info.texture.resource.handle;
- screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
+ Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes);
- // Memory::RasterizerFlushRegion(framebuffer_info.address, size_in_bytes);
+ VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
+ Memory::GetPointer(framebuffer_addr),
+ gl_framebuffer_data.data(), true);
- state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
- state.Apply();
+ state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+ state.Apply();
- glActiveTexture(GL_TEXTURE0);
- glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)framebuffer_info.stride);
+ glActiveTexture(GL_TEXTURE0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
- // Update existing texture
- // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
- // they differ from the LCD resolution.
- // TODO: Applications could theoretically crash Citra here by specifying too large
- // framebuffer sizes. We should make sure that this cannot happen.
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer_info.width, framebuffer_info.height,
- screen_info.texture.gl_format, screen_info.texture.gl_type,
- gl_framebuffer_data.data());
+ // Update existing texture
+ // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
+ // they differ from the LCD resolution.
+ // TODO: Applications could theoretically crash yuzu here by specifying too large
+ // framebuffer sizes. We should make sure that this cannot happen.
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
+ screen_info.texture.gl_format, screen_info.texture.gl_type,
+ gl_framebuffer_data.data());
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- state.texture_units[0].texture_2d = 0;
- state.Apply();
+ state.texture_units[0].texture_2d = 0;
+ state.Apply();
+ }
}
/**
@@ -372,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() {
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
- const FramebufferInfo& framebuffer_info) {
+ const Tegra::FramebufferConfig& framebuffer) {
- texture.width = framebuffer_info.width;
- texture.height = framebuffer_info.height;
+ texture.width = framebuffer.width;
+ texture.height = framebuffer.height;
GLint internal_format;
- switch (framebuffer_info.pixel_format) {
- case FramebufferInfo::PixelFormat::ABGR8:
+ switch (framebuffer.pixel_format) {
+ case Tegra::FramebufferConfig::PixelFormat::ABGR8:
// Use RGBA8 and swap in the fragment shader
internal_format = GL_RGBA;
texture.gl_format = GL_RGBA;
@@ -387,7 +279,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
gl_framebuffer_data.resize(texture.width * texture.height * 4);
break;
default:
- UNIMPLEMENTED();
+ UNREACHABLE();
}
state.texture_units[0].texture_2d = texture.resource.handle;
@@ -401,11 +293,22 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
state.Apply();
}
-void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w,
- float h) {
+void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
+ float h) {
const auto& texcoords = screen_info.display_texcoords;
- const auto& left = framebuffer_flip_vertical ? texcoords.right : texcoords.left;
- const auto& right = framebuffer_flip_vertical ? texcoords.left : texcoords.right;
+ auto left = texcoords.left;
+ auto right = texcoords.right;
+ if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset)
+ if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) {
+ // Flip the framebuffer vertically
+ left = texcoords.right;
+ right = texcoords.left;
+ } else {
+ // Other transformations are unsupported
+ LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags=%d",
+ framebuffer_transform_flags);
+ UNIMPLEMENTED();
+ }
std::array<ScreenRectVertex, 4> vertices = {{
ScreenRectVertex(x, y, texcoords.top, right),
@@ -427,7 +330,7 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl
/**
* Draws the emulated screens to the emulator window.
*/
-void RendererOpenGL::DrawScreens() {
+void RendererOpenGL::DrawScreen() {
const auto& layout = render_window->GetFramebufferLayout();
const auto& screen = layout.screen;
@@ -443,8 +346,8 @@ void RendererOpenGL::DrawScreens() {
glActiveTexture(GL_TEXTURE0);
glUniform1i(uniform_color_texture, 0);
- DrawSingleScreen(screen_info, (float)screen.left, (float)screen.top, (float)screen.GetWidth(),
- (float)screen.GetHeight());
+ DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top,
+ (float)screen.GetWidth(), (float)screen.GetHeight());
m_current_frame++;
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 05bb3c5cf..fffd0f9f4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -21,7 +21,7 @@ struct TextureInfo {
GLsizei height;
GLenum gl_format;
GLenum gl_type;
- RendererBase::FramebufferInfo::PixelFormat pixel_format;
+ Tegra::FramebufferConfig::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for each 3DS screen
@@ -37,7 +37,7 @@ public:
~RendererOpenGL() override;
/// Swap buffers (render frame)
- void SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) override;
+ void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) override;
/**
* Set the emulator window to use for renderer
@@ -53,13 +53,14 @@ public:
private:
void InitOpenGLObjects();
- void ConfigureFramebufferTexture(TextureInfo& texture, const FramebufferInfo& framebuffer_info);
- void DrawScreens();
- void DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h);
+ void ConfigureFramebufferTexture(TextureInfo& texture,
+ const Tegra::FramebufferConfig& framebuffer);
+ void DrawScreen();
+ void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
void UpdateFramerate();
// Loads framebuffer from emulated memory into the display information structure
- void LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info, ScreenInfo& screen_info);
+ void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, ScreenInfo& screen_info);
// Fills active OpenGL texture with the given RGBA color.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
@@ -87,6 +88,6 @@ private:
GLuint attrib_position;
GLuint attrib_tex_coord;
- /// Flips the framebuffer vertically when true
- bool framebuffer_flip_vertical;
+ /// Used for transforming the framebuffer orientation
+ Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
};
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
new file mode 100644
index 000000000..2e87281eb
--- /dev/null
+++ b/src/video_core/textures/decoders.cpp
@@ -0,0 +1,105 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include "common/assert.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+namespace Texture {
+
+/**
+ * Calculates the offset of an (x, y) position within a swizzled texture.
+ * Taken from the Tegra X1 TRM.
+ */
+static u32 GetSwizzleOffset(u32 x, u32 y, u32 image_width, u32 bytes_per_pixel, u32 block_height) {
+ u32 image_width_in_gobs = image_width * bytes_per_pixel / 64;
+ u32 GOB_address = 0 + (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
+ (x * bytes_per_pixel / 64) * 512 * block_height +
+ (y % (8 * block_height) / 8) * 512;
+ x *= bytes_per_pixel;
+ u32 address = GOB_address + ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
+ (y % 2) * 16 + (x % 16);
+
+ return address;
+}
+
+static void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
+ u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
+ u32 block_height) {
+ u8* data_ptrs[2];
+ for (unsigned y = 0; y < height; ++y) {
+ for (unsigned x = 0; x < width; ++x) {
+ u32 swizzle_offset = GetSwizzleOffset(x, y, width, bytes_per_pixel, block_height);
+ u32 pixel_index = (x + y * width) * out_bytes_per_pixel;
+
+ data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
+ data_ptrs[!unswizzle] = &unswizzled_data[pixel_index];
+
+ std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
+ }
+ }
+}
+
+u32 BytesPerPixel(TextureFormat format) {
+ switch (format) {
+ case TextureFormat::DXT1:
+ // In this case a 'pixel' actually refers to a 4x4 tile.
+ return 8;
+ case TextureFormat::A8R8G8B8:
+ return 4;
+ default:
+ UNIMPLEMENTED_MSG("Format not implemented");
+ break;
+ }
+}
+
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
+ u8* data = Memory::GetPointer(address);
+ u32 bytes_per_pixel = BytesPerPixel(format);
+
+ static constexpr u32 DefaultBlockHeight = 16;
+
+ std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
+
+ switch (format) {
+ case TextureFormat::DXT1:
+ // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
+ CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
+ unswizzled_data.data(), true, DefaultBlockHeight);
+ break;
+ case TextureFormat::A8R8G8B8:
+ CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
+ unswizzled_data.data(), true, DefaultBlockHeight);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Format not implemented");
+ break;
+ }
+
+ return unswizzled_data;
+}
+
+std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
+ u32 height) {
+ std::vector<u8> rgba_data;
+
+ // TODO(Subv): Implement.
+ switch (format) {
+ case TextureFormat::DXT1:
+ case TextureFormat::A8R8G8B8:
+ // TODO(Subv): For the time being just forward the same data without any decoding.
+ rgba_data = texture_data;
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Format not implemented");
+ break;
+ }
+
+ return rgba_data;
+}
+
+} // namespace Texture
+} // namespace Tegra
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
new file mode 100644
index 000000000..0c21694ff
--- /dev/null
+++ b/src/video_core/textures/decoders.h
@@ -0,0 +1,26 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+namespace Texture {
+
+/**
+ * Unswizzles a swizzled texture without changing its format.
+ */
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
+
+/**
+ * Decodes an unswizzled texture into a A8R8G8B8 texture.
+ */
+std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
+ u32 height);
+
+} // namespace Texture
+} // namespace Tegra
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
new file mode 100644
index 000000000..07936f8a3
--- /dev/null
+++ b/src/video_core/textures/texture.h
@@ -0,0 +1,137 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+namespace Texture {
+
+enum class TextureFormat : u32 {
+ A8R8G8B8 = 8,
+ DXT1 = 0x24,
+};
+
+enum class TextureType : u32 {
+ Texture1D = 0,
+ Texture2D = 1,
+ Texture3D = 2,
+ TextureCubemap = 3,
+ Texture1DArray = 4,
+ Texture2DArray = 5,
+ Texture1DBuffer = 6,
+ Texture2DNoMipmap = 7,
+ TextureCubeArray = 8,
+};
+
+enum class TICHeaderVersion : u32 {
+ OneDBuffer = 0,
+ PitchColorKey = 1,
+ Pitch = 2,
+ BlockLinear = 3,
+ BlockLinearColorKey = 4,
+};
+
+union TextureHandle {
+ u32 raw;
+ BitField<0, 20, u32> tic_id;
+ BitField<20, 12, u32> tsc_id;
+};
+static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
+
+struct TICEntry {
+ union {
+ u32 raw;
+ BitField<0, 7, TextureFormat> format;
+ BitField<7, 3, u32> r_type;
+ BitField<10, 3, u32> g_type;
+ BitField<13, 3, u32> b_type;
+ BitField<16, 3, u32> a_type;
+ };
+ u32 address_low;
+ union {
+ BitField<0, 16, u32> address_high;
+ BitField<21, 3, TICHeaderVersion> header_version;
+ };
+ INSERT_PADDING_BYTES(4);
+ union {
+ BitField<0, 16, u32> width_minus_1;
+ BitField<23, 4, TextureType> texture_type;
+ };
+ u16 height_minus_1;
+ INSERT_PADDING_BYTES(10);
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+ }
+
+ u32 Width() const {
+ return width_minus_1 + 1;
+ }
+
+ u32 Height() const {
+ return height_minus_1 + 1;
+ }
+};
+static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
+
+enum class WrapMode : u32 {
+ Wrap = 0,
+ Mirror = 1,
+ ClampToEdge = 2,
+ Border = 3,
+ ClampOGL = 4,
+ MirrorOnceClampToEdge = 5,
+ MirrorOnceBorder = 6,
+ MirrorOnceClampOGL = 7,
+};
+
+enum class TextureFilter : u32 {
+ Nearest = 1,
+ Linear = 2,
+};
+
+enum class TextureMipmapFilter : u32 {
+ None = 1,
+ Nearest = 2,
+ Linear = 3,
+};
+
+struct TSCEntry {
+ union {
+ BitField<0, 3, WrapMode> wrap_u;
+ BitField<3, 3, WrapMode> wrap_v;
+ BitField<6, 3, WrapMode> wrap_p;
+ BitField<9, 1, u32> depth_compare_enabled;
+ BitField<10, 3, u32> depth_compare_func;
+ };
+ union {
+ BitField<0, 2, TextureFilter> mag_filter;
+ BitField<4, 2, TextureFilter> min_filter;
+ BitField<6, 2, TextureMipmapFilter> mip_filter;
+ };
+ INSERT_PADDING_BYTES(8);
+ u32 border_color_r;
+ u32 border_color_g;
+ u32 border_color_b;
+ u32 border_color_a;
+};
+static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
+
+struct FullTextureInfo {
+ u32 index;
+ TICEntry tic;
+ TSCEntry tsc;
+ bool enabled;
+};
+
+/// Returns the number of bytes per pixel of the input texture format.
+u32 BytesPerPixel(TextureFormat format);
+
+} // namespace Texture
+} // namespace Tegra
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index d94a10417..be0f7e22b 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -49,4 +49,116 @@ static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
return (i + offset) * bytes_per_pixel;
}
+static inline u32 MortonInterleave128(u32 x, u32 y) {
+ // 128x128 Z-Order coordinate from 2D coordinates
+ static constexpr u32 xlut[] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
+ 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
+ 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
+ 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
+ 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
+ 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
+ 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
+ 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
+ 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
+ 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
+ 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
+ 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
+ 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
+ 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
+ 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
+ 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
+ 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
+ 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
+ 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
+ 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
+ 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
+ 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
+ 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
+ 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
+ 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
+ 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
+ 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
+ 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
+ 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
+ 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
+ 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
+ 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
+ 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
+ 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
+ };
+ static constexpr u32 ylut[] = {
+ 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
+ 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
+ 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
+ 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
+ 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
+ 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
+ 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
+ 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
+ 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
+ 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
+ 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
+ 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
+ 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
+ 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
+ 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
+ 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
+ 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
+ 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
+ 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
+ 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
+ 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
+ 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
+ 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
+ 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
+ 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
+ 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
+ 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
+ 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
+ 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
+ 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
+ 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
+ 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
+ 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
+ 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
+ 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
+ };
+ return xlut[x % 128] + ylut[y % 128];
+}
+
+static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
+ // Calculates the offset of the position of the pixel in Morton order
+ // Framebuffer images are split into 128x128 tiles.
+
+ const unsigned int block_height = 128;
+ const unsigned int coarse_x = x & ~127;
+
+ u32 i = MortonInterleave128(x, y);
+
+ const unsigned int offset = coarse_x * block_height;
+
+ return (i + offset) * bytes_per_pixel;
+}
+
+static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel,
+ u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data,
+ bool morton_to_gl) {
+ u8* data_ptrs[2];
+ for (unsigned y = 0; y < height; ++y) {
+ for (unsigned x = 0; x < width; ++x) {
+ const u32 coarse_y = y & ~127;
+ u32 morton_offset =
+ GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
+ u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
+
+ data_ptrs[morton_to_gl] = morton_data + morton_offset;
+ data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
+
+ memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
+ }
+ }
+}
+
} // namespace VideoCore
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 864691baa..289140f31 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -26,7 +26,7 @@ bool Init(EmuWindow* emu_window) {
if (g_renderer->Init()) {
LOG_DEBUG(Render, "initialized OK");
} else {
- LOG_ERROR(Render, "initialization failed !");
+ LOG_CRITICAL(Render, "initialization failed !");
return false;
}
return true;
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 1fd90b9d0..37da62436 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -15,6 +15,8 @@ class RendererBase;
namespace VideoCore {
+enum class Renderer { Software, OpenGL };
+
extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
extern EmuWindow* g_emu_window; ///< Emu window