diff options
author | Yuri Kunde Schlesner <yuriks@yuriks.net> | 2016-09-21 20:29:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-09-21 20:29:48 +0200 |
commit | d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a (patch) | |
tree | 8a22ca73ff838f3f0090b29a548ae81087fc90ed /src/video_core | |
parent | README: Specify master branch for Travis CI badge (diff) | |
parent | Fix Travis clang-format check (diff) | |
download | yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.gz yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.bz2 yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.lz yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.xz yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.zst yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.zip |
Diffstat (limited to 'src/video_core')
42 files changed, 2927 insertions, 2610 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index db99ce666..05b5cea73 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -5,15 +5,12 @@ #include <algorithm> #include <array> #include <cstddef> - #include <boost/container/static_vector.hpp> #include <boost/container/vector.hpp> - #include "common/bit_field.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/vector_math.h" - #include "video_core/clipper.h" #include "video_core/pica.h" #include "video_core/pica_state.h" @@ -27,15 +24,10 @@ namespace Clipper { struct ClippingEdge { public: - ClippingEdge(Math::Vec4<float24> coeffs, - Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), - float24::FromFloat32(0), - float24::FromFloat32(0), - float24::FromFloat32(0))) - : coeffs(coeffs), - bias(bias) - { - } + ClippingEdge(Math::Vec4<float24> coeffs, Math::Vec4<float24> bias = Math::Vec4<float24>( + float24::FromFloat32(0), float24::FromFloat32(0), + float24::FromFloat32(0), float24::FromFloat32(0))) + : coeffs(coeffs), bias(bias) {} bool IsInside(const OutputVertex& vertex) const { return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); @@ -59,8 +51,7 @@ private: Math::Vec4<float24> bias; }; -static void InitScreenCoordinates(OutputVertex& vtx) -{ +static void InitScreenCoordinates(OutputVertex& vtx) { struct { float24 halfsize_x; float24 offset_x; @@ -73,8 +64,8 @@ static void InitScreenCoordinates(OutputVertex& vtx) const auto& regs = g_state.regs; viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); - viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); - viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); + viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); + viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; @@ -85,12 +76,14 @@ static void InitScreenCoordinates(OutputVertex& vtx) vtx.tc2 *= inv_w; vtx.pos.w = inv_w; - vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; - vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; + vtx.screenpos[0] = + (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; + vtx.screenpos[1] = + (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; vtx.screenpos[2] = vtx.pos.z * inv_w; } -void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { +void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { using boost::container::static_vector; // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at @@ -98,10 +91,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. static const size_t MAX_VERTICES = 9; - static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 }; + static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; static_vector<OutputVertex, MAX_VERTICES> buffer_b; auto* output_list = &buffer_a; - auto* input_list = &buffer_b; + auto* input_list = &buffer_b; // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest @@ -110,13 +103,13 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu static const float24 f0 = float24::FromFloat32(0.0); static const float24 f1 = float24::FromFloat32(1.0); static const std::array<ClippingEdge, 7> clipping_edges = {{ - { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w - { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w - { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w - { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w - { Math::MakeVec( f0, f0, f1, f0) }, // z = 0 - { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w - { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON + {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w + {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w + {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w + {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w + {Math::MakeVec(f0, f0, f1, f0)}, // z = 0 + {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w + {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON }}; // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) @@ -154,10 +147,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu InitScreenCoordinates((*output_list)[0]); InitScreenCoordinates((*output_list)[1]); - for (size_t i = 0; i < output_list->size() - 2; i ++) { + for (size_t i = 0; i < output_list->size() - 2; i++) { OutputVertex& vtx0 = (*output_list)[0]; - OutputVertex& vtx1 = (*output_list)[i+1]; - OutputVertex& vtx2 = (*output_list)[i+2]; + OutputVertex& vtx1 = (*output_list)[i + 1]; + OutputVertex& vtx2 = (*output_list)[i + 2]; InitScreenCoordinates(vtx2); @@ -165,19 +158,20 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", - i + 1, output_list->size() - 2, - vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), - vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), - vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), - vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), - vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), - vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); + i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), + vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(), + vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), + vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), + vtx2.pos.w.ToFloat32(), vtx0.screenpos.x.ToFloat32(), + vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), + vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), + vtx1.screenpos.z.ToFloat32(), vtx2.screenpos.x.ToFloat32(), + vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); } } - } // namespace } // namespace diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h index f85d8d4c9..b51af0af9 100644 --- a/src/video_core/clipper.h +++ b/src/video_core/clipper.h @@ -7,7 +7,7 @@ namespace Pica { namespace Shader { - struct OutputVertex; +struct OutputVertex; } namespace Clipper { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 689859049..bb618cacd 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -6,17 +6,14 @@ #include <cstddef> #include <memory> #include <utility> - #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "common/vector_math.h" - #include "core/hle/service/gsp_gpu.h" #include "core/hw/gpu.h" #include "core/memory.h" #include "core/tracer/recorder.h" - #include "video_core/command_processor.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" @@ -43,10 +40,8 @@ static u32 default_attr_write_buffer[3]; // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF static const u32 expand_bits_to_bytes[] = { - 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, - 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, - 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, - 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff + 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, + 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, }; MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); @@ -68,383 +63,393 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { regs[id] = (old_value & ~write_mask) | (value & write_mask); - DebugUtils::OnPicaRegWrite({ (u16)id, (u16)mask, regs[id] }); + DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs[id]}); if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); - - switch(id) { - // Trigger IRQ - case PICA_REG_INDEX(trigger_irq): - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); - break; - - case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E): - g_state.primitive_assembler.Reconfigure(regs.triangle_topology); - break; - - case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F): - g_state.primitive_assembler.Reset(); - break; - - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): - g_state.immediate.current_attribute = 0; + g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, + reinterpret_cast<void*>(&id)); + + switch (id) { + // Trigger IRQ + case PICA_REG_INDEX(trigger_irq): + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); + break; + + case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E): + g_state.primitive_assembler.Reconfigure(regs.triangle_topology); + break; + + case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F): + g_state.primitive_assembler.Reset(); + break; + + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): + g_state.immediate.current_attribute = 0; + default_attr_counter = 0; + break; + + // Load default vertex input attributes + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): { + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + default_attr_write_buffer[default_attr_counter++] = value; + + // Default attributes are written in a packed format such that four float24 values are + // encoded in + // three 32-bit numbers. We write to internal memory once a full such vector is + // written. + if (default_attr_counter >= 3) { default_attr_counter = 0; - break; - - // Load default vertex input attributes - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): - { - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - default_attr_write_buffer[default_attr_counter++] = value; - - // Default attributes are written in a packed format such that four float24 values are encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if (default_attr_counter >= 3) { - default_attr_counter = 0; - - auto& setup = regs.vs_default_attributes_setup; - - if (setup.index >= 16) { - LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); - break; - } - Math::Vec4<float24> attribute; + auto& setup = regs.vs_default_attributes_setup; - // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); + if (setup.index >= 16) { + LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); + break; + } - LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, - attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), - attribute.w.ToFloat32()); + Math::Vec4<float24> attribute; - // TODO: Verify that this actually modifies the register! - if (setup.index < 15) { - g_state.vs_default_attributes[setup.index] = attribute; - setup.index++; - } else { - // Put each attribute into an immediate input buffer. - // When all specified immediate attributes are present, the Vertex Shader is invoked and everything is - // sent to the primitive assembler. + // NOTE: The destination component order indeed is "backwards" + attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | + ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | + ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); - auto& immediate_input = g_state.immediate.input_vertex; - auto& immediate_attribute_id = g_state.immediate.current_attribute; + LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, + attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), + attribute.w.ToFloat32()); - immediate_input.attr[immediate_attribute_id++] = attribute; + // TODO: Verify that this actually modifies the register! + if (setup.index < 15) { + g_state.vs_default_attributes[setup.index] = attribute; + setup.index++; + } else { + // Put each attribute into an immediate input buffer. + // When all specified immediate attributes are present, the Vertex Shader is invoked + // and everything is + // sent to the primitive assembler. - if (immediate_attribute_id >= regs.vs.num_input_attributes+1) { - immediate_attribute_id = 0; + auto& immediate_input = g_state.immediate.input_vertex; + auto& immediate_attribute_id = g_state.immediate.current_attribute; - Shader::UnitState<false> shader_unit; - g_state.vs.Setup(); + immediate_input.attr[immediate_attribute_id++] = attribute; - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input)); - g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); - Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs); + if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { + immediate_attribute_id = 0; - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; + Shader::UnitState<false> shader_unit; + g_state.vs.Setup(); - g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); - } + // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, + static_cast<void*>(&immediate_input)); + g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1); + Shader::OutputVertex output_vertex = + shader_unit.output_registers.ToVertex(regs.vs); + + // Send to renderer + using Pica::Shader::OutputVertex; + auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, + const OutputVertex& v2) { + VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); + }; + + g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); } } - break; } + break; + } - case PICA_REG_INDEX(gpu_mode): - if (regs.gpu_mode == Regs::GPUMode::Configuring) { - // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + case PICA_REG_INDEX(gpu_mode): + if (regs.gpu_mode == Regs::GPUMode::Configuring) { + // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } + if (g_debug_context) { + g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); } - break; - - case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): - case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): - { - unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0])); - u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); - g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; - g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); - break; } + break; + + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): { + unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0])); + u32* head_ptr = + (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); + g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; + g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); + break; + } - // It seems like these trigger vertex rendering - case PICA_REG_INDEX(trigger_draw): - case PICA_REG_INDEX(trigger_draw_indexed): - { - MICROPROFILE_SCOPE(GPU_Drawing); + // It seems like these trigger vertex rendering + case PICA_REG_INDEX(trigger_draw): + case PICA_REG_INDEX(trigger_draw_indexed): { + MICROPROFILE_SCOPE(GPU_Drawing); #if PICA_LOG_TEV - DebugUtils::DumpTevStageConfig(regs.GetTevStages()); + DebugUtils::DumpTevStageConfig(regs.GetTevStages()); #endif - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - - // Processes information about internal vertex attributes to figure out how a vertex is loaded. - // Later, these can be compiled and cached. - const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); - VertexLoader loader(regs); - - // Load vertices - bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); - - const auto& index_info = regs.index_array; - const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); - const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); - bool index_u16 = index_info.format != 0; - - PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; - - if (g_debug_context) { - for (int i = 0; i < 3; ++i) { - const auto texture = regs.GetTextures()[i]; - if (!texture.enabled) - continue; - - u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); - if (g_debug_context && Pica::g_debug_context->recorder) - g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress()); - } + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); + + // Processes information about internal vertex attributes to figure out how a vertex is + // loaded. + // Later, these can be compiled and cached. + const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); + VertexLoader loader(regs); + + // Load vertices + bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); + + const auto& index_info = regs.index_array; + const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); + const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); + bool index_u16 = index_info.format != 0; + + PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; + + if (g_debug_context) { + for (int i = 0; i < 3; ++i) { + const auto texture = regs.GetTextures()[i]; + if (!texture.enabled) + continue; + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + if (g_debug_context && Pica::g_debug_context->recorder) + g_debug_context->recorder->MemoryAccessed( + texture_data, Pica::Regs::NibblesPerPixel(texture.format) * + texture.config.width / 2 * texture.config.height, + texture.config.GetPhysicalAddress()); } + } - DebugUtils::MemoryAccessTracker memory_accesses; - - // Simple circular-replacement vertex cache - // The size has been tuned for optimal balance between hit-rate and the cost of lookup - const size_t VERTEX_CACHE_SIZE = 32; - std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; - std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache; + DebugUtils::MemoryAccessTracker memory_accesses; - unsigned int vertex_cache_pos = 0; - vertex_cache_ids.fill(-1); + // Simple circular-replacement vertex cache + // The size has been tuned for optimal balance between hit-rate and the cost of lookup + const size_t VERTEX_CACHE_SIZE = 32; + std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; + std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache; - Shader::UnitState<false> shader_unit; - g_state.vs.Setup(); + unsigned int vertex_cache_pos = 0; + vertex_cache_ids.fill(-1); - for (unsigned int index = 0; index < regs.num_vertices; ++index) - { - // Indexed rendering doesn't use the start offset - unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : (index + regs.vertex_offset); + Shader::UnitState<false> shader_unit; + g_state.vs.Setup(); - // -1 is a common special value used for primitive restart. Since it's unknown if - // the PICA supports it, and it would mess up the caching, guard against it here. - ASSERT(vertex != -1); + for (unsigned int index = 0; index < regs.num_vertices; ++index) { + // Indexed rendering doesn't use the start offset + unsigned int vertex = + is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) + : (index + regs.vertex_offset); - bool vertex_cache_hit = false; - Shader::OutputRegisters output_registers; + // -1 is a common special value used for primitive restart. Since it's unknown if + // the PICA supports it, and it would mess up the caching, guard against it here. + ASSERT(vertex != -1); - if (is_indexed) { - if (g_debug_context && Pica::g_debug_context->recorder) { - int size = index_u16 ? 2 : 1; - memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); - } + bool vertex_cache_hit = false; + Shader::OutputRegisters output_registers; - for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { - if (vertex == vertex_cache_ids[i]) { - output_registers = vertex_cache[i]; - vertex_cache_hit = true; - break; - } - } + if (is_indexed) { + if (g_debug_context && Pica::g_debug_context->recorder) { + int size = index_u16 ? 2 : 1; + memory_accesses.AddAccess(base_address + index_info.offset + size * index, + size); } - if (!vertex_cache_hit) { - // Initialize data for the current vertex - Shader::InputVertex input; - loader.LoadVertex(base_address, index, vertex, input, memory_accesses); - - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); - g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); - output_registers = shader_unit.output_registers; - - if (is_indexed) { - vertex_cache[vertex_cache_pos] = output_registers; - vertex_cache_ids[vertex_cache_pos] = vertex; - vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; + for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { + if (vertex == vertex_cache_ids[i]) { + output_registers = vertex_cache[i]; + vertex_cache_hit = true; + break; } } + } - // Retreive vertex from register data - Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); + if (!vertex_cache_hit) { + // Initialize data for the current vertex + Shader::InputVertex input; + loader.LoadVertex(base_address, index, vertex, input, memory_accesses); - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = []( - const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; + // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, + (void*)&input); + g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); + output_registers = shader_unit.output_registers; - primitive_assembler.SubmitVertex(output_vertex, AddTriangle); + if (is_indexed) { + vertex_cache[vertex_cache_pos] = output_registers; + vertex_cache_ids[vertex_cache_pos] = vertex; + vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; + } } - for (auto& range : memory_accesses.ranges) { - g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), - range.second, range.first); - } + // Retreive vertex from register data + Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); + + // Send to renderer + using Pica::Shader::OutputVertex; + auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, + const OutputVertex& v2) { + VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); + }; - break; + primitive_assembler.SubmitVertex(output_vertex, AddTriangle); } - case PICA_REG_INDEX(vs.bool_uniforms): - for (unsigned i = 0; i < 16; ++i) - g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; - - break; - - case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): - case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): - case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): - case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): - { - int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); - auto values = regs.vs.int_uniforms[index]; - g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); - LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", - index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); - break; + for (auto& range : memory_accesses.ranges) { + g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), + range.second, range.first); } - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): - case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): - { - auto& uniform_setup = regs.vs.uniform_setup; - - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - uniform_write_buffer[float_regs_counter++] = value; - - // Uniforms are written in a packed format such that four float24 values are encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || - (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { - float_regs_counter = 0; - - auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; - - if (uniform_setup.index > 95) { - LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); - break; - } + break; + } - // NOTE: The destination component order indeed is "backwards" - if (uniform_setup.IsFloat32()) { - for (auto i : {0,1,2,3}) - uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); - } else { - // TODO: Untested - uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); - uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); - uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); - uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); - } + case PICA_REG_INDEX(vs.bool_uniforms): + for (unsigned i = 0; i < 16; ++i) + g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; + + break; + + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { + int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); + auto values = regs.vs.int_uniforms[index]; + g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); + LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), + values.y.Value(), values.z.Value(), values.w.Value()); + break; + } - LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, - uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), - uniform.w.ToFloat32()); + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { + auto& uniform_setup = regs.vs.uniform_setup; + + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + uniform_write_buffer[float_regs_counter++] = value; + + // Uniforms are written in a packed format such that four float24 values are encoded in + // three 32-bit numbers. We write to internal memory once a full such vector is + // written. + if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || + (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { + float_regs_counter = 0; + + auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; + + if (uniform_setup.index > 95) { + LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); + break; + } - // TODO: Verify that this actually modifies the register! - uniform_setup.index.Assign(uniform_setup.index + 1); + // NOTE: The destination component order indeed is "backwards" + if (uniform_setup.IsFloat32()) { + for (auto i : {0, 1, 2, 3}) + uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); + } else { + // TODO: Untested + uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); + uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | + ((uniform_write_buffer[1] >> 16) & 0xFFFF)); + uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | + ((uniform_write_buffer[2] >> 24) & 0xFF)); + uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); } - break; - } - // Load shader program code - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): - case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): - { - g_state.vs.program_code[regs.vs.program.offset] = value; - regs.vs.program.offset++; - break; - } + LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, + uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), + uniform.w.ToFloat32()); - // Load swizzle pattern data - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): - case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): - { - g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; - regs.vs.swizzle_patterns.offset++; - break; + // TODO: Verify that this actually modifies the register! + uniform_setup.index.Assign(uniform_setup.index + 1); } + break; + } - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): - { - auto& lut_config = regs.lighting.lut_config; - - ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); - - g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; - lut_config.index.Assign(lut_config.index + 1); - break; - } + // Load shader program code + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { + g_state.vs.program_code[regs.vs.program.offset] = value; + regs.vs.program.offset++; + break; + } - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): - case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): - { - g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; - regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); - break; - } + // Load swizzle pattern data + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { + g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; + regs.vs.swizzle_patterns.offset++; + break; + } + + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { + auto& lut_config = regs.lighting.lut_config; + + ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); + + g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; + lut_config.index.Assign(lut_config.index + 1); + break; + } - default: - break; + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): { + g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; + regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); + break; + } + + default: + break; } VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id); if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id)); + g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, + reinterpret_cast<void*>(&id)); } void ProcessCommandList(const u32* list, u32 size) { @@ -458,14 +463,14 @@ void ProcessCommandList(const u32* list, u32 size) { ++g_state.cmd_list.current_ptr; u32 value = *g_state.cmd_list.current_ptr++; - const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; + const CommandHeader header = {*g_state.cmd_list.current_ptr++}; WritePicaReg(header.cmd_id, value, header.parameter_mask); for (unsigned i = 0; i < header.extra_data_length; ++i) { u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask); - } + } } } diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index 022a71f5e..62ad2d3f3 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h @@ -5,7 +5,6 @@ #pragma once #include <type_traits> - #include "common/bit_field.h" #include "common/common_types.h" @@ -16,7 +15,7 @@ namespace CommandProcessor { union CommandHeader { u32 hex; - BitField< 0, 16, u32> cmd_id; + BitField<0, 16, u32> cmd_id; // parameter_mask: // Mask applied to the input value to make it possible to update @@ -25,11 +24,11 @@ union CommandHeader { // second bit: 0x0000FF00 // third bit: 0x00FF0000 // fourth bit: 0xFF000000 - BitField<16, 4, u32> parameter_mask; + BitField<16, 4, u32> parameter_mask; BitField<20, 11, u32> extra_data_length; - BitField<31, 1, u32> group_commands; + BitField<31, 1, u32> group_commands; }; static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout"); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index bfa686380..8806464d9 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -20,7 +20,6 @@ #include <nihstro/bit_field.h> #include <nihstro/float24.h> #include <nihstro/shader_binary.h> - #include "common/assert.h" #include "common/bit_field.h" #include "common/color.h" @@ -29,7 +28,6 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "common/vector_math.h" - #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" #include "video_core/pica_state.h" @@ -50,7 +48,8 @@ void DebugContext::DoOnEvent(Event event, void* data) { { std::unique_lock<std::mutex> lock(breakpoint_mutex); - // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets + // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug + // widgets VideoCore::g_renderer->Rasterizer()->FlushAll(); // TODO: Should stop the CPU thread here once we multithread emulation. @@ -64,7 +63,7 @@ void DebugContext::DoOnEvent(Event event, void* data) { } // Wait until another thread tells us to Resume() - resume_from_breakpoint.wait(lock, [&]{ return !at_breakpoint; }); + resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; }); } } @@ -88,8 +87,9 @@ std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this global namespace DebugUtils { -void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) -{ +void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, + const Shader::ShaderSetup& setup, + const Regs::VSOutputAttributes* output_attributes) { struct StuffToWrite { const u8* pointer; u32 size; @@ -97,7 +97,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c std::vector<StuffToWrite> writing_queue; u32 write_offset = 0; - auto QueueForWriting = [&writing_queue,&write_offset](const u8* pointer, u32 size) { + auto QueueForWriting = [&writing_queue, &write_offset](const u8* pointer, u32 size) { writing_queue.push_back({pointer, size}); u32 old_write_offset = write_offset; write_offset += size; @@ -108,99 +108,95 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c // into shbin format (separate type and component mask). union OutputRegisterInfo { enum Type : u64 { - POSITION = 0, + POSITION = 0, QUATERNION = 1, - COLOR = 2, - TEXCOORD0 = 3, - TEXCOORD1 = 5, - TEXCOORD2 = 6, + COLOR = 2, + TEXCOORD0 = 3, + TEXCOORD1 = 5, + TEXCOORD2 = 6, - VIEW = 8, + VIEW = 8, }; - BitField< 0, 64, u64> hex; + BitField<0, 64, u64> hex; - BitField< 0, 16, Type> type; + BitField<0, 16, Type> type; BitField<16, 16, u64> id; - BitField<32, 4, u64> component_mask; + BitField<32, 4, u64> component_mask; }; // This is put into a try-catch block to make sure we notice unknown configurations. std::vector<OutputRegisterInfo> output_info_table; - for (unsigned i = 0; i < 7; ++i) { - using OutputAttributes = Pica::Regs::VSOutputAttributes; - - // TODO: It's still unclear how the attribute components map to the register! - // Once we know that, this code probably will not make much sense anymore. - std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = { - { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, - { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, - { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, - { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, - { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} }, - { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} }, - { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} }, - { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} }, - { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, - { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, - { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, - { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, - { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, - { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, - { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, - { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, - { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, - { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }, - { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} }, - { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} }, - { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} } - }; - - for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ - output_attributes[i].map_x, - output_attributes[i].map_y, - output_attributes[i].map_z, - output_attributes[i].map_w }) { - if (semantic == OutputAttributes::INVALID) - continue; - - try { - OutputRegisterInfo::Type type = map.at(semantic).first; - u32 component_mask = map.at(semantic).second; - - auto it = std::find_if(output_info_table.begin(), output_info_table.end(), - [&i, &type](const OutputRegisterInfo& info) { - return info.id == i && info.type == type; - } - ); - - if (it == output_info_table.end()) { - output_info_table.emplace_back(); - output_info_table.back().type.Assign(type); - output_info_table.back().component_mask.Assign(component_mask); - output_info_table.back().id.Assign(i); - } else { - it->component_mask.Assign(it->component_mask | component_mask); - } - } catch (const std::out_of_range& ) { - DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); - LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", - (int)output_attributes[i].map_x.Value(), - (int)output_attributes[i].map_y.Value(), - (int)output_attributes[i].map_z.Value(), - (int)output_attributes[i].map_w.Value()); + for (unsigned i = 0; i < 7; ++i) { + using OutputAttributes = Pica::Regs::VSOutputAttributes; + + // TODO: It's still unclear how the attribute components map to the register! + // Once we know that, this code probably will not make much sense anymore. + std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32>> map = { + {OutputAttributes::POSITION_X, {OutputRegisterInfo::POSITION, 1}}, + {OutputAttributes::POSITION_Y, {OutputRegisterInfo::POSITION, 2}}, + {OutputAttributes::POSITION_Z, {OutputRegisterInfo::POSITION, 4}}, + {OutputAttributes::POSITION_W, {OutputRegisterInfo::POSITION, 8}}, + {OutputAttributes::QUATERNION_X, {OutputRegisterInfo::QUATERNION, 1}}, + {OutputAttributes::QUATERNION_Y, {OutputRegisterInfo::QUATERNION, 2}}, + {OutputAttributes::QUATERNION_Z, {OutputRegisterInfo::QUATERNION, 4}}, + {OutputAttributes::QUATERNION_W, {OutputRegisterInfo::QUATERNION, 8}}, + {OutputAttributes::COLOR_R, {OutputRegisterInfo::COLOR, 1}}, + {OutputAttributes::COLOR_G, {OutputRegisterInfo::COLOR, 2}}, + {OutputAttributes::COLOR_B, {OutputRegisterInfo::COLOR, 4}}, + {OutputAttributes::COLOR_A, {OutputRegisterInfo::COLOR, 8}}, + {OutputAttributes::TEXCOORD0_U, {OutputRegisterInfo::TEXCOORD0, 1}}, + {OutputAttributes::TEXCOORD0_V, {OutputRegisterInfo::TEXCOORD0, 2}}, + {OutputAttributes::TEXCOORD1_U, {OutputRegisterInfo::TEXCOORD1, 1}}, + {OutputAttributes::TEXCOORD1_V, {OutputRegisterInfo::TEXCOORD1, 2}}, + {OutputAttributes::TEXCOORD2_U, {OutputRegisterInfo::TEXCOORD2, 1}}, + {OutputAttributes::TEXCOORD2_V, {OutputRegisterInfo::TEXCOORD2, 2}}, + {OutputAttributes::VIEW_X, {OutputRegisterInfo::VIEW, 1}}, + {OutputAttributes::VIEW_Y, {OutputRegisterInfo::VIEW, 2}}, + {OutputAttributes::VIEW_Z, {OutputRegisterInfo::VIEW, 4}}, + }; + + for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ + output_attributes[i].map_x, output_attributes[i].map_y, output_attributes[i].map_z, + output_attributes[i].map_w}) { + if (semantic == OutputAttributes::INVALID) + continue; + + try { + OutputRegisterInfo::Type type = map.at(semantic).first; + u32 component_mask = map.at(semantic).second; + + auto it = std::find_if(output_info_table.begin(), output_info_table.end(), + [&i, &type](const OutputRegisterInfo& info) { + return info.id == i && info.type == type; + }); + + if (it == output_info_table.end()) { + output_info_table.emplace_back(); + output_info_table.back().type.Assign(type); + output_info_table.back().component_mask.Assign(component_mask); + output_info_table.back().id.Assign(i); + } else { + it->component_mask.Assign(it->component_mask | component_mask); } + } catch (const std::out_of_range&) { + DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); + LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", + (int)output_attributes[i].map_x.Value(), + (int)output_attributes[i].map_y.Value(), + (int)output_attributes[i].map_z.Value(), + (int)output_attributes[i].map_w.Value()); } } - + } struct { DVLBHeader header; u32 dvle_offset; - } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE + } dvlb{{DVLBHeader::MAGIC_WORD, 1}}; // 1 DVLE - DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; - DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; + DVLPHeader dvlp{DVLPHeader::MAGIC_WORD}; + DVLEHeader dvle{DVLEHeader::MAGIC_WORD}; QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb)); u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp)); @@ -216,14 +212,16 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size()); u32 dummy = 0; for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { - QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); + QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), + sizeof(setup.swizzle_data[i])); QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy)); } dvle.main_offset_words = config.main_offset; dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); - QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); + QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()), + static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); // TODO: Create a label table for "main" @@ -258,10 +256,8 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); // Store constant if it's different from zero.. - if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || - setup.uniforms.f[i].y.ToFloat32() != 0.0 || - setup.uniforms.f[i].z.ToFloat32() != 0.0 || - setup.uniforms.f[i].w.ToFloat32() != 0.0) + if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || setup.uniforms.f[i].y.ToFloat32() != 0.0 || + setup.uniforms.f[i].z.ToFloat32() != 0.0 || setup.uniforms.f[i].w.ToFloat32() != 0.0) constant_table.emplace_back(constant); } dvle.constant_table_offset = write_offset - dvlb.dvle_offset; @@ -282,8 +278,7 @@ static std::unique_ptr<PicaTrace> pica_trace; static std::mutex pica_trace_mutex; static int is_pica_tracing = false; -void StartPicaTracing() -{ +void StartPicaTracing() { if (is_pica_tracing) { LOG_WARNING(HW_GPU, "StartPicaTracing called even though tracing already running!"); return; @@ -295,13 +290,11 @@ void StartPicaTracing() is_pica_tracing = true; } -bool IsPicaTracing() -{ +bool IsPicaTracing() { return is_pica_tracing != 0; } -void OnPicaRegWrite(PicaTrace::Write write) -{ +void OnPicaRegWrite(PicaTrace::Write write) { // Double check for is_pica_tracing to avoid pointless locking overhead if (!is_pica_tracing) return; @@ -314,8 +307,7 @@ void OnPicaRegWrite(PicaTrace::Write write) pica_trace->writes.push_back(write); } -std::unique_ptr<PicaTrace> FinishPicaTracing() -{ +std::unique_ptr<PicaTrace> FinishPicaTracing() { if (!is_pica_tracing) { LOG_WARNING(HW_GPU, "FinishPicaTracing called even though tracing isn't running!"); return {}; @@ -331,12 +323,12 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() return ret; } -const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { +const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, + bool disable_alpha) { const unsigned int coarse_x = x & ~7; const unsigned int coarse_y = y & ~7; - if (info.format != Regs::TextureFormat::ETC1 && - info.format != Regs::TextureFormat::ETC1A4) { + if (info.format != Regs::TextureFormat::ETC1 && info.format != Regs::TextureFormat::ETC1A4) { // TODO(neobrain): Fix code design to unify vertical block offsets! source += coarse_y * info.stride; } @@ -344,73 +336,63 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture // TODO: Assert that width/height are multiples of block dimensions switch (info.format) { - case Regs::TextureFormat::RGBA8: - { + case Regs::TextureFormat::RGBA8: { auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4)); - return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; + return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; } - case Regs::TextureFormat::RGB8: - { + case Regs::TextureFormat::RGB8: { auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3)); - return { res.r(), res.g(), res.b(), 255 }; + return {res.r(), res.g(), res.b(), 255}; } - case Regs::TextureFormat::RGB5A1: - { + case Regs::TextureFormat::RGB5A1: { auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2)); - return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; + return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; } - case Regs::TextureFormat::RGB565: - { + case Regs::TextureFormat::RGB565: { auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2)); - return { res.r(), res.g(), res.b(), 255 }; + return {res.r(), res.g(), res.b(), 255}; } - case Regs::TextureFormat::RGBA4: - { + case Regs::TextureFormat::RGBA4: { auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2)); - return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; + return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; } - case Regs::TextureFormat::IA8: - { + case Regs::TextureFormat::IA8: { const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); if (disable_alpha) { // Show intensity as red, alpha as green - return { source_ptr[1], source_ptr[0], 0, 255 }; + return {source_ptr[1], source_ptr[0], 0, 255}; } else { - return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0] }; + return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; } } - case Regs::TextureFormat::RG8: - { + case Regs::TextureFormat::RG8: { auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2)); - return { res.r(), res.g(), 0, 255 }; + return {res.r(), res.g(), 0, 255}; } - case Regs::TextureFormat::I8: - { + case Regs::TextureFormat::I8: { const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); - return { *source_ptr, *source_ptr, *source_ptr, 255 }; + return {*source_ptr, *source_ptr, *source_ptr, 255}; } - case Regs::TextureFormat::A8: - { + case Regs::TextureFormat::A8: { const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); if (disable_alpha) { - return { *source_ptr, *source_ptr, *source_ptr, 255 }; + return {*source_ptr, *source_ptr, *source_ptr, 255}; } else { - return { 0, 0, 0, *source_ptr }; + return {0, 0, 0, *source_ptr}; } } - case Regs::TextureFormat::IA4: - { + case Regs::TextureFormat::IA4: { const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); @@ -418,25 +400,23 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture if (disable_alpha) { // Show intensity as red, alpha as green - return { i, a, 0, 255 }; + return {i, a, 0, 255}; } else { - return { i, i, i, a }; + return {i, i, i, a}; } } - case Regs::TextureFormat::I4: - { + case Regs::TextureFormat::I4: { u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); const u8* source_ptr = source + morton_offset / 2; u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); i = Color::Convert4To8(i); - return { i, i, i, 255 }; + return {i, i, i, 255}; } - case Regs::TextureFormat::A4: - { + case Regs::TextureFormat::A4: { u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); const u8* source_ptr = source + morton_offset / 2; @@ -444,15 +424,14 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture a = Color::Convert4To8(a); if (disable_alpha) { - return { a, a, a, 255 }; + return {a, a, a, 255}; } else { - return { 0, 0, 0, a }; + return {0, 0, 0, a}; } } case Regs::TextureFormat::ETC1: - case Regs::TextureFormat::ETC1A4: - { + case Regs::TextureFormat::ETC1A4: { bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4); // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles @@ -462,10 +441,9 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1); unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name... - const u64* source_ptr = (const u64*)(source - + coarse_x * subtile_bytes * 4 - + coarse_y * subtile_bytes * 4 * (info.width / 8) - + subtile_index * subtile_bytes * 8); + const u64* source_ptr = (const u64*)(source + coarse_x * subtile_bytes * 4 + + coarse_y * subtile_bytes * 4 * (info.width / 8) + + subtile_index * subtile_bytes * 8); u64 alpha = 0xFFFFFFFFFFFFFFFF; if (has_alpha) { alpha = *source_ptr; @@ -474,7 +452,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture union ETC1Tile { // Each of these two is a collection of 16 bits (one per lookup value) - BitField< 0, 16, u64> table_subindexes; + BitField<0, 16, u64> table_subindexes; BitField<16, 16, u64> negation_flags; unsigned GetTableSubIndex(unsigned index) const { @@ -547,11 +525,18 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture } // Add modifier - unsigned table_index = static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); + unsigned table_index = + static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ - {{ 2, 8 }}, {{ 5, 17 }}, {{ 9, 29 }}, {{ 13, 42 }}, - {{ 18, 60 }}, {{ 24, 80 }}, {{ 33, 106 }}, {{ 47, 183 }} + {{2, 8}}, + {{5, 17}}, + {{9, 29}}, + {{13, 42}}, + {{18, 60}}, + {{24, 80}}, + {{33, 106}}, + {{47, 183}}, }}; int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel)); @@ -564,7 +549,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture return ret.Cast<u8>(); } - } const *etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr); + } const* etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr); alpha >>= 4 * ((x & 3) * 4 + (y & 3)); return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3), @@ -579,8 +564,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture } TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, - const Regs::TextureFormat& format) -{ + const Regs::TextureFormat& format) { TextureInfo info; info.physical_address = config.GetPhysicalAddress(); info.width = config.width; @@ -595,13 +579,13 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); if (!fp->WriteBytes(data, length)) - png_error(png_ptr, "Failed to write to output PNG file."); + png_error(png_ptr, "Failed to write to output PNG file."); } static void FlushIOFile(png_structp png_ptr) { auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); if (!fp->Flush()) - png_error(png_ptr, "Failed to flush to output PNG file."); + png_error(png_ptr, "Failed to flush to output PNG file."); } #endif @@ -614,7 +598,8 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { // Write data to file static int dump_index = 0; - std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); + std::string filename = + std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); u32 row_stride = texture_config.width * 3; u8* buf; @@ -632,7 +617,6 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { if (png_ptr == nullptr) { LOG_ERROR(Debug_GPU, "Could not allocate write struct"); goto finalise; - } // Initialize info structure @@ -651,9 +635,9 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile); // Write header (8 bit color depth) - png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, - 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, - PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 8, + PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, + PNG_FILTER_TYPE_BASE); png_text title_text; title_text.compression = PNG_TEXT_COMPRESSION_NONE; @@ -672,15 +656,14 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { info.stride = row_stride; info.format = g_state.regs.texture0_format; Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); - buf[3 * x + y * row_stride ] = texture_color.r(); + buf[3 * x + y * row_stride] = texture_color.r(); buf[3 * x + y * row_stride + 1] = texture_color.g(); buf[3 * x + y * row_stride + 2] = texture_color.b(); } } // Write image data - for (unsigned y = 0; y < texture_config.height; ++y) - { + for (unsigned y = 0; y < texture_config.height; ++y) { u8* row_ptr = (u8*)buf + y * row_stride; png_write_row(png_ptr, row_ptr); } @@ -691,12 +674,15 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { png_write_end(png_ptr, nullptr); finalise: - if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); - if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr); + if (info_ptr != nullptr) + png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); + if (png_ptr != nullptr) + png_destroy_write_struct(&png_ptr, (png_infopp) nullptr); #endif } -static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) { +static std::string ReplacePattern(const std::string& input, const std::string& pattern, + const std::string& replacement) { size_t start = input.find(pattern); if (start == std::string::npos) return input; @@ -709,16 +695,16 @@ static std::string ReplacePattern(const std::string& input, const std::string& p static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) { using Source = Pica::Regs::TevStageConfig::Source; static const std::map<Source, std::string> source_map = { - { Source::PrimaryColor, "PrimaryColor" }, - { Source::PrimaryFragmentColor, "PrimaryFragmentColor" }, - { Source::SecondaryFragmentColor, "SecondaryFragmentColor" }, - { Source::Texture0, "Texture0" }, - { Source::Texture1, "Texture1" }, - { Source::Texture2, "Texture2" }, - { Source::Texture3, "Texture3" }, - { Source::PreviousBuffer, "PreviousBuffer" }, - { Source::Constant, "Constant" }, - { Source::Previous, "Previous" }, + {Source::PrimaryColor, "PrimaryColor"}, + {Source::PrimaryFragmentColor, "PrimaryFragmentColor"}, + {Source::SecondaryFragmentColor, "SecondaryFragmentColor"}, + {Source::Texture0, "Texture0"}, + {Source::Texture1, "Texture1"}, + {Source::Texture2, "Texture2"}, + {Source::Texture3, "Texture3"}, + {Source::PreviousBuffer, "PreviousBuffer"}, + {Source::Constant, "Constant"}, + {Source::Previous, "Previous"}, }; const auto src_it = source_map.find(source); @@ -728,19 +714,21 @@ static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfi return src_it->second; } -static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) { +static std::string GetTevStageConfigColorSourceString( + const Pica::Regs::TevStageConfig::Source& source, + const Pica::Regs::TevStageConfig::ColorModifier modifier) { using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; static const std::map<ColorModifier, std::string> color_modifier_map = { - { ColorModifier::SourceColor, "%source.rgb" }, - { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" }, - { ColorModifier::SourceAlpha, "%source.aaa" }, - { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" }, - { ColorModifier::SourceRed, "%source.rrr" }, - { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" }, - { ColorModifier::SourceGreen, "%source.ggg" }, - { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" }, - { ColorModifier::SourceBlue, "%source.bbb" }, - { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" }, + {ColorModifier::SourceColor, "%source.rgb"}, + {ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)"}, + {ColorModifier::SourceAlpha, "%source.aaa"}, + {ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)"}, + {ColorModifier::SourceRed, "%source.rrr"}, + {ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)"}, + {ColorModifier::SourceGreen, "%source.ggg"}, + {ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)"}, + {ColorModifier::SourceBlue, "%source.bbb"}, + {ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)"}, }; auto src_str = GetTevStageConfigSourceString(source); @@ -752,17 +740,19 @@ static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStage return ReplacePattern(modifier_str, "%source", src_str); } -static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) { +static std::string GetTevStageConfigAlphaSourceString( + const Pica::Regs::TevStageConfig::Source& source, + const Pica::Regs::TevStageConfig::AlphaModifier modifier) { using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; static const std::map<AlphaModifier, std::string> alpha_modifier_map = { - { AlphaModifier::SourceAlpha, "%source.a" }, - { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" }, - { AlphaModifier::SourceRed, "%source.r" }, - { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" }, - { AlphaModifier::SourceGreen, "%source.g" }, - { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" }, - { AlphaModifier::SourceBlue, "%source.b" }, - { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" }, + {AlphaModifier::SourceAlpha, "%source.a"}, + {AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)"}, + {AlphaModifier::SourceRed, "%source.r"}, + {AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)"}, + {AlphaModifier::SourceGreen, "%source.g"}, + {AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)"}, + {AlphaModifier::SourceBlue, "%source.b"}, + {AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)"}, }; auto src_str = GetTevStageConfigSourceString(source); @@ -774,18 +764,19 @@ static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStage return ReplacePattern(modifier_str, "%source", src_str); } -static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) { +static std::string GetTevStageConfigOperationString( + const Pica::Regs::TevStageConfig::Operation& operation) { using Operation = Pica::Regs::TevStageConfig::Operation; static const std::map<Operation, std::string> combiner_map = { - { Operation::Replace, "%source1" }, - { Operation::Modulate, "(%source1 * %source2)" }, - { Operation::Add, "(%source1 + %source2)" }, - { Operation::AddSigned, "(%source1 + %source2) - 0.5" }, - { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, - { Operation::Subtract, "(%source1 - %source2)" }, - { Operation::Dot3_RGB, "dot(%source1, %source2)" }, - { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" }, - { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" }, + {Operation::Replace, "%source1"}, + {Operation::Modulate, "(%source1 * %source2)"}, + {Operation::Add, "(%source1 + %source2)"}, + {Operation::AddSigned, "(%source1 + %source2) - 0.5"}, + {Operation::Lerp, "lerp(%source1, %source2, %source3)"}, + {Operation::Subtract, "(%source1 - %source2)"}, + {Operation::Dot3_RGB, "dot(%source1, %source2)"}, + {Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)"}, + {Operation::AddThenMultiply, "((%source1 + %source2) * %source3)"}, }; const auto op_it = combiner_map.find(operation); @@ -797,23 +788,37 @@ static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageCo std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); - op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); - op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); - return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); + op_str = ReplacePattern( + op_str, "%source1", + GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); + op_str = ReplacePattern( + op_str, "%source2", + GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); + return ReplacePattern( + op_str, "%source3", + GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); } std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); - op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); - op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); - return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); + op_str = ReplacePattern( + op_str, "%source1", + GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); + op_str = ReplacePattern( + op_str, "%source2", + GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); + return ReplacePattern( + op_str, "%source3", + GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); } void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) { std::string stage_info = "Tev setup:\n"; for (size_t index = 0; index < stages.size(); ++index) { const auto& tev_stage = stages[index]; - stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; + stage_info += "Stage " + std::to_string(index) + ": " + + GetTevStageConfigColorCombinerString(tev_stage) + " " + + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; } LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); } diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 92e9734ae..189c93abb 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -15,10 +15,8 @@ #include <string> #include <utility> #include <vector> - #include "common/common_types.h" #include "common/vector_math.h" - #include "video_core/pica.h" namespace CiTrace { @@ -53,13 +51,16 @@ public: * Most importantly this is used for our debugger GUI. * * To implement event handling, override the OnPicaBreakPointHit and OnPicaResume methods. - * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state access - * @todo Evaluate an alternative interface, in which there is only one managing observer and multiple child observers running (by design) on the same thread. + * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state + * access + * @todo Evaluate an alternative interface, in which there is only one managing observer and + * multiple child observers running (by design) on the same thread. */ class BreakPointObserver { public: /// Constructs the object such that it observes events of the given DebugContext. - BreakPointObserver(std::shared_ptr<DebugContext> debug_context) : context_weak(debug_context) { + BreakPointObserver(std::shared_ptr<DebugContext> debug_context) + : context_weak(debug_context) { std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); debug_context->breakpoint_observers.push_back(this); } @@ -84,15 +85,13 @@ public: * @param data Optional data pointer (if unused, this is a nullptr) * @note This function will perform nothing unless it is overridden in the child class. */ - virtual void OnPicaBreakPointHit(Event, void*) { - } + virtual void OnPicaBreakPointHit(Event, void*) {} /** * Action to perform when emulation is resumed from a breakpoint. * @note This function will perform nothing unless it is overridden in the child class. */ - virtual void OnPicaResume() { - } + virtual void OnPicaResume() {} protected: /** @@ -122,7 +121,8 @@ public: * The current thread then is halted until Resume() is called from another thread (or until * emulation is stopped). * @param event Event which has happened - * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. + * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until + * Resume() is called. */ void OnEvent(Event event, void* data) { // This check is left in the header to allow the compiler to inline it. @@ -132,11 +132,12 @@ public: DoOnEvent(event, data); } - void DoOnEvent(Event event, void *data); + void DoOnEvent(Event event, void* data); /** * Resume from the current breakpoint. - * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. Calling from any other thread is safe. + * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. + * Calling from any other thread is safe. */ void Resume(); @@ -144,7 +145,7 @@ public: * Delete all set breakpoints and resume emulation. */ void ClearBreakpoints() { - for (auto &bp : breakpoints) { + for (auto& bp : breakpoints) { bp.enabled = false; } Resume(); @@ -182,8 +183,8 @@ namespace DebugUtils { #define PICA_LOG_TEV 0 void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, - const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); - + const Shader::ShaderSetup& setup, + const Regs::VSOutputAttributes* output_attributes); // Utility class to log Pica commands. struct PicaTrace { @@ -216,7 +217,10 @@ struct TextureInfo { * @param source Source pointer to read data from * @param s,t Texture coordinates to read from * @param info TextureInfo object describing the texture setup - * @param disable_alpha This is used for debug widgets which use this method to display textures without providing a good way to visualize alpha by themselves. If true, this will return 255 for the alpha component, and either drop the information entirely or store it in an "unused" color channel. + * @param disable_alpha This is used for debug widgets which use this method to display textures + * without providing a good way to visualize alpha by themselves. If true, this will return 255 for + * the alpha component, and either drop the information entirely or store it in an "unused" color + * channel. * @todo Eventually we should get rid of the disable_alpha parameter. */ const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info, @@ -237,7 +241,8 @@ class MemoryAccessTracker { /// Combine overlapping and close ranges void SimplifyRanges() { for (auto it = ranges.begin(); it != ranges.end(); ++it) { - // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too + // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, + // too auto it2 = std::next(it); while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { it->second = std::max(it->second, it2->first + it2->second - it->first); diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index a3aab216c..3c6636d66 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h @@ -7,20 +7,16 @@ #include <algorithm> #include <functional> #include <vector> - #include "core/hle/service/gsp_gpu.h" -class GraphicsDebugger -{ +class GraphicsDebugger { public: // Base class for all objects which need to be notified about GPU events - class DebuggerObserver - { + class DebuggerObserver { public: - DebuggerObserver() : observed(nullptr) { } + DebuggerObserver() : observed(nullptr) {} - virtual ~DebuggerObserver() - { + virtual ~DebuggerObserver() { if (observed) observed->UnregisterObserver(this); } @@ -31,15 +27,13 @@ public: * @param total_command_count Total number of commands in the GX history * @note All methods in this class are called from the GSP thread */ - virtual void GXCommandProcessed(int total_command_count) - { - const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count-1); + virtual void GXCommandProcessed(int total_command_count) { + const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count - 1); LOG_TRACE(Debug_GPU, "Received command: id=%x", (int)cmd.id.Value()); } protected: - const GraphicsDebugger* GetDebugger() const - { + const GraphicsDebugger* GetDebugger() const { return observed; } @@ -49,8 +43,7 @@ public: friend class GraphicsDebugger; }; - void GXCommandProcessed(u8* command_data) - { + void GXCommandProcessed(u8* command_data) { if (observers.empty()) return; @@ -60,33 +53,29 @@ public: memcpy(&cmd, command_data, sizeof(GSP_GPU::Command)); ForEachObserver([this](DebuggerObserver* observer) { - observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size())); - } ); + observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size())); + }); } - const GSP_GPU::Command& ReadGXCommandHistory(int index) const - { + const GSP_GPU::Command& ReadGXCommandHistory(int index) const { // TODO: Is this thread-safe? return gx_command_history[index]; } - void RegisterObserver(DebuggerObserver* observer) - { + void RegisterObserver(DebuggerObserver* observer) { // TODO: Check for duplicates observers.push_back(observer); observer->observed = this; } - void UnregisterObserver(DebuggerObserver* observer) - { + void UnregisterObserver(DebuggerObserver* observer) { observers.erase(std::remove(observers.begin(), observers.end(), observer), observers.end()); observer->observed = nullptr; } private: - void ForEachObserver(std::function<void (DebuggerObserver*)> func) - { - std::for_each(observers.begin(),observers.end(), func); + void ForEachObserver(std::function<void(DebuggerObserver*)> func) { + std::for_each(observers.begin(), observers.end(), func); } std::vector<DebuggerObserver*> observers; diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index ec78f9593..ce2bd455e 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -6,7 +6,6 @@ #include <iterator> #include <unordered_map> #include <utility> - #include "video_core/pica.h" #include "video_core/pica_state.h" #include "video_core/primitive_assembly.h" @@ -17,466 +16,466 @@ namespace Pica { State g_state; static const std::pair<u16, const char*> register_names[] = { - { 0x010, "GPUREG_FINALIZE" }, - - { 0x040, "GPUREG_FACECULLING_CONFIG" }, - { 0x041, "GPUREG_VIEWPORT_WIDTH" }, - { 0x042, "GPUREG_VIEWPORT_INVW" }, - { 0x043, "GPUREG_VIEWPORT_HEIGHT" }, - { 0x044, "GPUREG_VIEWPORT_INVH" }, - - { 0x047, "GPUREG_FRAGOP_CLIP" }, - { 0x048, "GPUREG_FRAGOP_CLIP_DATA0" }, - { 0x049, "GPUREG_FRAGOP_CLIP_DATA1" }, - { 0x04A, "GPUREG_FRAGOP_CLIP_DATA2" }, - { 0x04B, "GPUREG_FRAGOP_CLIP_DATA3" }, - - { 0x04D, "GPUREG_DEPTHMAP_SCALE" }, - { 0x04E, "GPUREG_DEPTHMAP_OFFSET" }, - { 0x04F, "GPUREG_SH_OUTMAP_TOTAL" }, - { 0x050, "GPUREG_SH_OUTMAP_O0" }, - { 0x051, "GPUREG_SH_OUTMAP_O1" }, - { 0x052, "GPUREG_SH_OUTMAP_O2" }, - { 0x053, "GPUREG_SH_OUTMAP_O3" }, - { 0x054, "GPUREG_SH_OUTMAP_O4" }, - { 0x055, "GPUREG_SH_OUTMAP_O5" }, - { 0x056, "GPUREG_SH_OUTMAP_O6" }, - - { 0x061, "GPUREG_EARLYDEPTH_FUNC" }, - { 0x062, "GPUREG_EARLYDEPTH_TEST1" }, - { 0x063, "GPUREG_EARLYDEPTH_CLEAR" }, - { 0x064, "GPUREG_SH_OUTATTR_MODE" }, - { 0x065, "GPUREG_SCISSORTEST_MODE" }, - { 0x066, "GPUREG_SCISSORTEST_POS" }, - { 0x067, "GPUREG_SCISSORTEST_DIM" }, - { 0x068, "GPUREG_VIEWPORT_XY" }, - - { 0x06A, "GPUREG_EARLYDEPTH_DATA" }, - - { 0x06D, "GPUREG_DEPTHMAP_ENABLE" }, - { 0x06E, "GPUREG_RENDERBUF_DIM" }, - { 0x06F, "GPUREG_SH_OUTATTR_CLOCK" }, - - { 0x080, "GPUREG_TEXUNIT_CONFIG" }, - { 0x081, "GPUREG_TEXUNIT0_BORDER_COLOR" }, - { 0x082, "GPUREG_TEXUNIT0_DIM" }, - { 0x083, "GPUREG_TEXUNIT0_PARAM" }, - { 0x084, "GPUREG_TEXUNIT0_LOD" }, - { 0x085, "GPUREG_TEXUNIT0_ADDR1" }, - { 0x086, "GPUREG_TEXUNIT0_ADDR2" }, - { 0x087, "GPUREG_TEXUNIT0_ADDR3" }, - { 0x088, "GPUREG_TEXUNIT0_ADDR4" }, - { 0x089, "GPUREG_TEXUNIT0_ADDR5" }, - { 0x08A, "GPUREG_TEXUNIT0_ADDR6" }, - { 0x08B, "GPUREG_TEXUNIT0_SHADOW" }, - - { 0x08E, "GPUREG_TEXUNIT0_TYPE" }, - { 0x08F, "GPUREG_LIGHTING_ENABLE0" }, - - { 0x091, "GPUREG_TEXUNIT1_BORDER_COLOR" }, - { 0x092, "GPUREG_TEXUNIT1_DIM" }, - { 0x093, "GPUREG_TEXUNIT1_PARAM" }, - { 0x094, "GPUREG_TEXUNIT1_LOD" }, - { 0x095, "GPUREG_TEXUNIT1_ADDR" }, - { 0x096, "GPUREG_TEXUNIT1_TYPE" }, - - { 0x099, "GPUREG_TEXUNIT2_BORDER_COLOR" }, - { 0x09A, "GPUREG_TEXUNIT2_DIM" }, - { 0x09B, "GPUREG_TEXUNIT2_PARAM" }, - { 0x09C, "GPUREG_TEXUNIT2_LOD" }, - { 0x09D, "GPUREG_TEXUNIT2_ADDR" }, - { 0x09E, "GPUREG_TEXUNIT2_TYPE" }, - - { 0x0A8, "GPUREG_TEXUNIT3_PROCTEX0" }, - { 0x0A9, "GPUREG_TEXUNIT3_PROCTEX1" }, - { 0x0AA, "GPUREG_TEXUNIT3_PROCTEX2" }, - { 0x0AB, "GPUREG_TEXUNIT3_PROCTEX3" }, - { 0x0AC, "GPUREG_TEXUNIT3_PROCTEX4" }, - { 0x0AD, "GPUREG_TEXUNIT3_PROCTEX5" }, - - { 0x0AF, "GPUREG_PROCTEX_LUT" }, - { 0x0B0, "GPUREG_PROCTEX_LUT_DATA0" }, - { 0x0B1, "GPUREG_PROCTEX_LUT_DATA1" }, - { 0x0B2, "GPUREG_PROCTEX_LUT_DATA2" }, - { 0x0B3, "GPUREG_PROCTEX_LUT_DATA3" }, - { 0x0B4, "GPUREG_PROCTEX_LUT_DATA4" }, - { 0x0B5, "GPUREG_PROCTEX_LUT_DATA5" }, - { 0x0B6, "GPUREG_PROCTEX_LUT_DATA6" }, - { 0x0B7, "GPUREG_PROCTEX_LUT_DATA7" }, - - { 0x0C0, "GPUREG_TEXENV0_SOURCE" }, - { 0x0C1, "GPUREG_TEXENV0_OPERAND" }, - { 0x0C2, "GPUREG_TEXENV0_COMBINER" }, - { 0x0C3, "GPUREG_TEXENV0_COLOR" }, - { 0x0C4, "GPUREG_TEXENV0_SCALE" }, - - { 0x0C8, "GPUREG_TEXENV1_SOURCE" }, - { 0x0C9, "GPUREG_TEXENV1_OPERAND" }, - { 0x0CA, "GPUREG_TEXENV1_COMBINER" }, - { 0x0CB, "GPUREG_TEXENV1_COLOR" }, - { 0x0CC, "GPUREG_TEXENV1_SCALE" }, - - { 0x0D0, "GPUREG_TEXENV2_SOURCE" }, - { 0x0D1, "GPUREG_TEXENV2_OPERAND" }, - { 0x0D2, "GPUREG_TEXENV2_COMBINER" }, - { 0x0D3, "GPUREG_TEXENV2_COLOR" }, - { 0x0D4, "GPUREG_TEXENV2_SCALE" }, - - { 0x0D8, "GPUREG_TEXENV3_SOURCE" }, - { 0x0D9, "GPUREG_TEXENV3_OPERAND" }, - { 0x0DA, "GPUREG_TEXENV3_COMBINER" }, - { 0x0DB, "GPUREG_TEXENV3_COLOR" }, - { 0x0DC, "GPUREG_TEXENV3_SCALE" }, - - { 0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER" }, - { 0x0E1, "GPUREG_FOG_COLOR" }, - - { 0x0E4, "GPUREG_GAS_ATTENUATION" }, - { 0x0E5, "GPUREG_GAS_ACCMAX" }, - { 0x0E6, "GPUREG_FOG_LUT_INDEX" }, - - { 0x0E8, "GPUREG_FOG_LUT_DATA0" }, - { 0x0E9, "GPUREG_FOG_LUT_DATA1" }, - { 0x0EA, "GPUREG_FOG_LUT_DATA2" }, - { 0x0EB, "GPUREG_FOG_LUT_DATA3" }, - { 0x0EC, "GPUREG_FOG_LUT_DATA4" }, - { 0x0ED, "GPUREG_FOG_LUT_DATA5" }, - { 0x0EE, "GPUREG_FOG_LUT_DATA6" }, - { 0x0EF, "GPUREG_FOG_LUT_DATA7" }, - { 0x0F0, "GPUREG_TEXENV4_SOURCE" }, - { 0x0F1, "GPUREG_TEXENV4_OPERAND" }, - { 0x0F2, "GPUREG_TEXENV4_COMBINER" }, - { 0x0F3, "GPUREG_TEXENV4_COLOR" }, - { 0x0F4, "GPUREG_TEXENV4_SCALE" }, - - { 0x0F8, "GPUREG_TEXENV5_SOURCE" }, - { 0x0F9, "GPUREG_TEXENV5_OPERAND" }, - { 0x0FA, "GPUREG_TEXENV5_COMBINER" }, - { 0x0FB, "GPUREG_TEXENV5_COLOR" }, - { 0x0FC, "GPUREG_TEXENV5_SCALE" }, - { 0x0FD, "GPUREG_TEXENV_BUFFER_COLOR" }, - - { 0x100, "GPUREG_COLOR_OPERATION" }, - { 0x101, "GPUREG_BLEND_FUNC" }, - { 0x102, "GPUREG_LOGIC_OP" }, - { 0x103, "GPUREG_BLEND_COLOR" }, - { 0x104, "GPUREG_FRAGOP_ALPHA_TEST" }, - { 0x105, "GPUREG_STENCIL_TEST" }, - { 0x106, "GPUREG_STENCIL_OP" }, - { 0x107, "GPUREG_DEPTH_COLOR_MASK" }, - - { 0x110, "GPUREG_FRAMEBUFFER_INVALIDATE" }, - { 0x111, "GPUREG_FRAMEBUFFER_FLUSH" }, - { 0x112, "GPUREG_COLORBUFFER_READ" }, - { 0x113, "GPUREG_COLORBUFFER_WRITE" }, - { 0x114, "GPUREG_DEPTHBUFFER_READ" }, - { 0x115, "GPUREG_DEPTHBUFFER_WRITE" }, - { 0x116, "GPUREG_DEPTHBUFFER_FORMAT" }, - { 0x117, "GPUREG_COLORBUFFER_FORMAT" }, - { 0x118, "GPUREG_EARLYDEPTH_TEST2" }, - - { 0x11B, "GPUREG_FRAMEBUFFER_BLOCK32" }, - { 0x11C, "GPUREG_DEPTHBUFFER_LOC" }, - { 0x11D, "GPUREG_COLORBUFFER_LOC" }, - { 0x11E, "GPUREG_FRAMEBUFFER_DIM" }, - - { 0x120, "GPUREG_GAS_LIGHT_XY" }, - { 0x121, "GPUREG_GAS_LIGHT_Z" }, - { 0x122, "GPUREG_GAS_LIGHT_Z_COLOR" }, - { 0x123, "GPUREG_GAS_LUT_INDEX" }, - { 0x124, "GPUREG_GAS_LUT_DATA" }, - - { 0x126, "GPUREG_GAS_DELTAZ_DEPTH" }, - - { 0x130, "GPUREG_FRAGOP_SHADOW" }, - - { 0x140, "GPUREG_LIGHT0_SPECULAR0" }, - { 0x141, "GPUREG_LIGHT0_SPECULAR1" }, - { 0x142, "GPUREG_LIGHT0_DIFFUSE" }, - { 0x143, "GPUREG_LIGHT0_AMBIENT" }, - { 0x144, "GPUREG_LIGHT0_XY" }, - { 0x145, "GPUREG_LIGHT0_Z" }, - { 0x146, "GPUREG_LIGHT0_SPOTDIR_XY" }, - { 0x147, "GPUREG_LIGHT0_SPOTDIR_Z" }, - - { 0x149, "GPUREG_LIGHT0_CONFIG" }, - { 0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS" }, - { 0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE" }, - - { 0x150, "GPUREG_LIGHT1_SPECULAR0" }, - { 0x151, "GPUREG_LIGHT1_SPECULAR1" }, - { 0x152, "GPUREG_LIGHT1_DIFFUSE" }, - { 0x153, "GPUREG_LIGHT1_AMBIENT" }, - { 0x154, "GPUREG_LIGHT1_XY" }, - { 0x155, "GPUREG_LIGHT1_Z" }, - { 0x156, "GPUREG_LIGHT1_SPOTDIR_XY" }, - { 0x157, "GPUREG_LIGHT1_SPOTDIR_Z" }, - - { 0x159, "GPUREG_LIGHT1_CONFIG" }, - { 0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS" }, - { 0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE" }, - - { 0x160, "GPUREG_LIGHT2_SPECULAR0" }, - { 0x161, "GPUREG_LIGHT2_SPECULAR1" }, - { 0x162, "GPUREG_LIGHT2_DIFFUSE" }, - { 0x163, "GPUREG_LIGHT2_AMBIENT" }, - { 0x164, "GPUREG_LIGHT2_XY" }, - { 0x165, "GPUREG_LIGHT2_Z" }, - { 0x166, "GPUREG_LIGHT2_SPOTDIR_XY" }, - { 0x167, "GPUREG_LIGHT2_SPOTDIR_Z" }, - - { 0x169, "GPUREG_LIGHT2_CONFIG" }, - { 0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS" }, - { 0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE" }, - - { 0x170, "GPUREG_LIGHT3_SPECULAR0" }, - { 0x171, "GPUREG_LIGHT3_SPECULAR1" }, - { 0x172, "GPUREG_LIGHT3_DIFFUSE" }, - { 0x173, "GPUREG_LIGHT3_AMBIENT" }, - { 0x174, "GPUREG_LIGHT3_XY" }, - { 0x175, "GPUREG_LIGHT3_Z" }, - { 0x176, "GPUREG_LIGHT3_SPOTDIR_XY" }, - { 0x177, "GPUREG_LIGHT3_SPOTDIR_Z" }, - - { 0x179, "GPUREG_LIGHT3_CONFIG" }, - { 0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS" }, - { 0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE" }, - - { 0x180, "GPUREG_LIGHT4_SPECULAR0" }, - { 0x181, "GPUREG_LIGHT4_SPECULAR1" }, - { 0x182, "GPUREG_LIGHT4_DIFFUSE" }, - { 0x183, "GPUREG_LIGHT4_AMBIENT" }, - { 0x184, "GPUREG_LIGHT4_XY" }, - { 0x185, "GPUREG_LIGHT4_Z" }, - { 0x186, "GPUREG_LIGHT4_SPOTDIR_XY" }, - { 0x187, "GPUREG_LIGHT4_SPOTDIR_Z" }, - - { 0x189, "GPUREG_LIGHT4_CONFIG" }, - { 0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS" }, - { 0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE" }, - - { 0x190, "GPUREG_LIGHT5_SPECULAR0" }, - { 0x191, "GPUREG_LIGHT5_SPECULAR1" }, - { 0x192, "GPUREG_LIGHT5_DIFFUSE" }, - { 0x193, "GPUREG_LIGHT5_AMBIENT" }, - { 0x194, "GPUREG_LIGHT5_XY" }, - { 0x195, "GPUREG_LIGHT5_Z" }, - { 0x196, "GPUREG_LIGHT5_SPOTDIR_XY" }, - { 0x197, "GPUREG_LIGHT5_SPOTDIR_Z" }, - - { 0x199, "GPUREG_LIGHT5_CONFIG" }, - { 0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS" }, - { 0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE" }, - - { 0x1A0, "GPUREG_LIGHT6_SPECULAR0" }, - { 0x1A1, "GPUREG_LIGHT6_SPECULAR1" }, - { 0x1A2, "GPUREG_LIGHT6_DIFFUSE" }, - { 0x1A3, "GPUREG_LIGHT6_AMBIENT" }, - { 0x1A4, "GPUREG_LIGHT6_XY" }, - { 0x1A5, "GPUREG_LIGHT6_Z" }, - { 0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY" }, - { 0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z" }, - - { 0x1A9, "GPUREG_LIGHT6_CONFIG" }, - { 0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS" }, - { 0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE" }, - - { 0x1B0, "GPUREG_LIGHT7_SPECULAR0" }, - { 0x1B1, "GPUREG_LIGHT7_SPECULAR1" }, - { 0x1B2, "GPUREG_LIGHT7_DIFFUSE" }, - { 0x1B3, "GPUREG_LIGHT7_AMBIENT" }, - { 0x1B4, "GPUREG_LIGHT7_XY" }, - { 0x1B5, "GPUREG_LIGHT7_Z" }, - { 0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY" }, - { 0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z" }, - - { 0x1B9, "GPUREG_LIGHT7_CONFIG" }, - { 0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS" }, - { 0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE" }, - - { 0x1C0, "GPUREG_LIGHTING_AMBIENT" }, - - { 0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS" }, - { 0x1C3, "GPUREG_LIGHTING_CONFIG0" }, - { 0x1C4, "GPUREG_LIGHTING_CONFIG1" }, - { 0x1C5, "GPUREG_LIGHTING_LUT_INDEX" }, - { 0x1C6, "GPUREG_LIGHTING_ENABLE1" }, - - { 0x1C8, "GPUREG_LIGHTING_LUT_DATA0" }, - { 0x1C9, "GPUREG_LIGHTING_LUT_DATA1" }, - { 0x1CA, "GPUREG_LIGHTING_LUT_DATA2" }, - { 0x1CB, "GPUREG_LIGHTING_LUT_DATA3" }, - { 0x1CC, "GPUREG_LIGHTING_LUT_DATA4" }, - { 0x1CD, "GPUREG_LIGHTING_LUT_DATA5" }, - { 0x1CE, "GPUREG_LIGHTING_LUT_DATA6" }, - { 0x1CF, "GPUREG_LIGHTING_LUT_DATA7" }, - { 0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS" }, - { 0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT" }, - { 0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE" }, - - { 0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION" }, - - { 0x200, "GPUREG_ATTRIBBUFFERS_LOC" }, - { 0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW" }, - { 0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH" }, - { 0x203, "GPUREG_ATTRIBBUFFER0_OFFSET" }, - { 0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1" }, - { 0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2" }, - { 0x206, "GPUREG_ATTRIBBUFFER1_OFFSET" }, - { 0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1" }, - { 0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2" }, - { 0x209, "GPUREG_ATTRIBBUFFER2_OFFSET" }, - { 0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1" }, - { 0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2" }, - { 0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET" }, - { 0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1" }, - { 0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2" }, - { 0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET" }, - { 0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1" }, - { 0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2" }, - { 0x212, "GPUREG_ATTRIBBUFFER5_OFFSET" }, - { 0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1" }, - { 0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2" }, - { 0x215, "GPUREG_ATTRIBBUFFER6_OFFSET" }, - { 0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1" }, - { 0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2" }, - { 0x218, "GPUREG_ATTRIBBUFFER7_OFFSET" }, - { 0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1" }, - { 0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2" }, - { 0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET" }, - { 0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1" }, - { 0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2" }, - { 0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET" }, - { 0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1" }, - { 0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2" }, - { 0x221, "GPUREG_ATTRIBBUFFER10_OFFSET" }, - { 0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1" }, - { 0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2" }, - { 0x224, "GPUREG_ATTRIBBUFFER11_OFFSET" }, - { 0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1" }, - { 0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2" }, - { 0x227, "GPUREG_INDEXBUFFER_CONFIG" }, - { 0x228, "GPUREG_NUMVERTICES" }, - { 0x229, "GPUREG_GEOSTAGE_CONFIG" }, - { 0x22A, "GPUREG_VERTEX_OFFSET" }, - - { 0x22D, "GPUREG_POST_VERTEX_CACHE_NUM" }, - { 0x22E, "GPUREG_DRAWARRAYS" }, - { 0x22F, "GPUREG_DRAWELEMENTS" }, - - { 0x231, "GPUREG_VTX_FUNC" }, - { 0x232, "GPUREG_FIXEDATTRIB_INDEX" }, - { 0x233, "GPUREG_FIXEDATTRIB_DATA0" }, - { 0x234, "GPUREG_FIXEDATTRIB_DATA1" }, - { 0x235, "GPUREG_FIXEDATTRIB_DATA2" }, - - { 0x238, "GPUREG_CMDBUF_SIZE0" }, - { 0x239, "GPUREG_CMDBUF_SIZE1" }, - { 0x23A, "GPUREG_CMDBUF_ADDR0" }, - { 0x23B, "GPUREG_CMDBUF_ADDR1" }, - { 0x23C, "GPUREG_CMDBUF_JUMP0" }, - { 0x23D, "GPUREG_CMDBUF_JUMP1" }, - - { 0x242, "GPUREG_VSH_NUM_ATTR" }, - - { 0x244, "GPUREG_VSH_COM_MODE" }, - { 0x245, "GPUREG_START_DRAW_FUNC0" }, - - { 0x24A, "GPUREG_VSH_OUTMAP_TOTAL1" }, - - { 0x251, "GPUREG_VSH_OUTMAP_TOTAL2" }, - { 0x252, "GPUREG_GSH_MISC0" }, - { 0x253, "GPUREG_GEOSTAGE_CONFIG2" }, - { 0x254, "GPUREG_GSH_MISC1" }, - - { 0x25E, "GPUREG_PRIMITIVE_CONFIG" }, - { 0x25F, "GPUREG_RESTART_PRIMITIVE" }, - - { 0x280, "GPUREG_GSH_BOOLUNIFORM" }, - { 0x281, "GPUREG_GSH_INTUNIFORM_I0" }, - { 0x282, "GPUREG_GSH_INTUNIFORM_I1" }, - { 0x283, "GPUREG_GSH_INTUNIFORM_I2" }, - { 0x284, "GPUREG_GSH_INTUNIFORM_I3" }, - - { 0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG" }, - { 0x28A, "GPUREG_GSH_ENTRYPOINT" }, - { 0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW" }, - { 0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH" }, - { 0x28D, "GPUREG_GSH_OUTMAP_MASK" }, - - { 0x28F, "GPUREG_GSH_CODETRANSFER_END" }, - { 0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX" }, - { 0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0" }, - { 0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1" }, - { 0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2" }, - { 0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3" }, - { 0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4" }, - { 0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5" }, - { 0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6" }, - { 0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7" }, - - { 0x29B, "GPUREG_GSH_CODETRANSFER_INDEX" }, - { 0x29C, "GPUREG_GSH_CODETRANSFER_DATA0" }, - { 0x29D, "GPUREG_GSH_CODETRANSFER_DATA1" }, - { 0x29E, "GPUREG_GSH_CODETRANSFER_DATA2" }, - { 0x29F, "GPUREG_GSH_CODETRANSFER_DATA3" }, - { 0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4" }, - { 0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5" }, - { 0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6" }, - { 0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7" }, - - { 0x2A5, "GPUREG_GSH_OPDESCS_INDEX" }, - { 0x2A6, "GPUREG_GSH_OPDESCS_DATA0" }, - { 0x2A7, "GPUREG_GSH_OPDESCS_DATA1" }, - { 0x2A8, "GPUREG_GSH_OPDESCS_DATA2" }, - { 0x2A9, "GPUREG_GSH_OPDESCS_DATA3" }, - { 0x2AA, "GPUREG_GSH_OPDESCS_DATA4" }, - { 0x2AB, "GPUREG_GSH_OPDESCS_DATA5" }, - { 0x2AC, "GPUREG_GSH_OPDESCS_DATA6" }, - { 0x2AD, "GPUREG_GSH_OPDESCS_DATA7" }, - - { 0x2B0, "GPUREG_VSH_BOOLUNIFORM" }, - { 0x2B1, "GPUREG_VSH_INTUNIFORM_I0" }, - { 0x2B2, "GPUREG_VSH_INTUNIFORM_I1" }, - { 0x2B3, "GPUREG_VSH_INTUNIFORM_I2" }, - { 0x2B4, "GPUREG_VSH_INTUNIFORM_I3" }, - - { 0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG" }, - { 0x2BA, "GPUREG_VSH_ENTRYPOINT" }, - { 0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW" }, - { 0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH" }, - { 0x2BD, "GPUREG_VSH_OUTMAP_MASK" }, - - { 0x2BF, "GPUREG_VSH_CODETRANSFER_END" }, - { 0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX" }, - { 0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0" }, - { 0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1" }, - { 0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2" }, - { 0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3" }, - { 0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4" }, - { 0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5" }, - { 0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6" }, - { 0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7" }, - - { 0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX" }, - { 0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0" }, - { 0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1" }, - { 0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2" }, - { 0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3" }, - { 0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4" }, - { 0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5" }, - { 0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6" }, - { 0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7" }, - - { 0x2D5, "GPUREG_VSH_OPDESCS_INDEX" }, - { 0x2D6, "GPUREG_VSH_OPDESCS_DATA0" }, - { 0x2D7, "GPUREG_VSH_OPDESCS_DATA1" }, - { 0x2D8, "GPUREG_VSH_OPDESCS_DATA2" }, - { 0x2D9, "GPUREG_VSH_OPDESCS_DATA3" }, - { 0x2DA, "GPUREG_VSH_OPDESCS_DATA4" }, - { 0x2DB, "GPUREG_VSH_OPDESCS_DATA5" }, - { 0x2DC, "GPUREG_VSH_OPDESCS_DATA6" }, - { 0x2DD, "GPUREG_VSH_OPDESCS_DATA7" }, + {0x010, "GPUREG_FINALIZE"}, + + {0x040, "GPUREG_FACECULLING_CONFIG"}, + {0x041, "GPUREG_VIEWPORT_WIDTH"}, + {0x042, "GPUREG_VIEWPORT_INVW"}, + {0x043, "GPUREG_VIEWPORT_HEIGHT"}, + {0x044, "GPUREG_VIEWPORT_INVH"}, + + {0x047, "GPUREG_FRAGOP_CLIP"}, + {0x048, "GPUREG_FRAGOP_CLIP_DATA0"}, + {0x049, "GPUREG_FRAGOP_CLIP_DATA1"}, + {0x04A, "GPUREG_FRAGOP_CLIP_DATA2"}, + {0x04B, "GPUREG_FRAGOP_CLIP_DATA3"}, + + {0x04D, "GPUREG_DEPTHMAP_SCALE"}, + {0x04E, "GPUREG_DEPTHMAP_OFFSET"}, + {0x04F, "GPUREG_SH_OUTMAP_TOTAL"}, + {0x050, "GPUREG_SH_OUTMAP_O0"}, + {0x051, "GPUREG_SH_OUTMAP_O1"}, + {0x052, "GPUREG_SH_OUTMAP_O2"}, + {0x053, "GPUREG_SH_OUTMAP_O3"}, + {0x054, "GPUREG_SH_OUTMAP_O4"}, + {0x055, "GPUREG_SH_OUTMAP_O5"}, + {0x056, "GPUREG_SH_OUTMAP_O6"}, + + {0x061, "GPUREG_EARLYDEPTH_FUNC"}, + {0x062, "GPUREG_EARLYDEPTH_TEST1"}, + {0x063, "GPUREG_EARLYDEPTH_CLEAR"}, + {0x064, "GPUREG_SH_OUTATTR_MODE"}, + {0x065, "GPUREG_SCISSORTEST_MODE"}, + {0x066, "GPUREG_SCISSORTEST_POS"}, + {0x067, "GPUREG_SCISSORTEST_DIM"}, + {0x068, "GPUREG_VIEWPORT_XY"}, + + {0x06A, "GPUREG_EARLYDEPTH_DATA"}, + + {0x06D, "GPUREG_DEPTHMAP_ENABLE"}, + {0x06E, "GPUREG_RENDERBUF_DIM"}, + {0x06F, "GPUREG_SH_OUTATTR_CLOCK"}, + + {0x080, "GPUREG_TEXUNIT_CONFIG"}, + {0x081, "GPUREG_TEXUNIT0_BORDER_COLOR"}, + {0x082, "GPUREG_TEXUNIT0_DIM"}, + {0x083, "GPUREG_TEXUNIT0_PARAM"}, + {0x084, "GPUREG_TEXUNIT0_LOD"}, + {0x085, "GPUREG_TEXUNIT0_ADDR1"}, + {0x086, "GPUREG_TEXUNIT0_ADDR2"}, + {0x087, "GPUREG_TEXUNIT0_ADDR3"}, + {0x088, "GPUREG_TEXUNIT0_ADDR4"}, + {0x089, "GPUREG_TEXUNIT0_ADDR5"}, + {0x08A, "GPUREG_TEXUNIT0_ADDR6"}, + {0x08B, "GPUREG_TEXUNIT0_SHADOW"}, + + {0x08E, "GPUREG_TEXUNIT0_TYPE"}, + {0x08F, "GPUREG_LIGHTING_ENABLE0"}, + + {0x091, "GPUREG_TEXUNIT1_BORDER_COLOR"}, + {0x092, "GPUREG_TEXUNIT1_DIM"}, + {0x093, "GPUREG_TEXUNIT1_PARAM"}, + {0x094, "GPUREG_TEXUNIT1_LOD"}, + {0x095, "GPUREG_TEXUNIT1_ADDR"}, + {0x096, "GPUREG_TEXUNIT1_TYPE"}, + + {0x099, "GPUREG_TEXUNIT2_BORDER_COLOR"}, + {0x09A, "GPUREG_TEXUNIT2_DIM"}, + {0x09B, "GPUREG_TEXUNIT2_PARAM"}, + {0x09C, "GPUREG_TEXUNIT2_LOD"}, + {0x09D, "GPUREG_TEXUNIT2_ADDR"}, + {0x09E, "GPUREG_TEXUNIT2_TYPE"}, + + {0x0A8, "GPUREG_TEXUNIT3_PROCTEX0"}, + {0x0A9, "GPUREG_TEXUNIT3_PROCTEX1"}, + {0x0AA, "GPUREG_TEXUNIT3_PROCTEX2"}, + {0x0AB, "GPUREG_TEXUNIT3_PROCTEX3"}, + {0x0AC, "GPUREG_TEXUNIT3_PROCTEX4"}, + {0x0AD, "GPUREG_TEXUNIT3_PROCTEX5"}, + + {0x0AF, "GPUREG_PROCTEX_LUT"}, + {0x0B0, "GPUREG_PROCTEX_LUT_DATA0"}, + {0x0B1, "GPUREG_PROCTEX_LUT_DATA1"}, + {0x0B2, "GPUREG_PROCTEX_LUT_DATA2"}, + {0x0B3, "GPUREG_PROCTEX_LUT_DATA3"}, + {0x0B4, "GPUREG_PROCTEX_LUT_DATA4"}, + {0x0B5, "GPUREG_PROCTEX_LUT_DATA5"}, + {0x0B6, "GPUREG_PROCTEX_LUT_DATA6"}, + {0x0B7, "GPUREG_PROCTEX_LUT_DATA7"}, + + {0x0C0, "GPUREG_TEXENV0_SOURCE"}, + {0x0C1, "GPUREG_TEXENV0_OPERAND"}, + {0x0C2, "GPUREG_TEXENV0_COMBINER"}, + {0x0C3, "GPUREG_TEXENV0_COLOR"}, + {0x0C4, "GPUREG_TEXENV0_SCALE"}, + + {0x0C8, "GPUREG_TEXENV1_SOURCE"}, + {0x0C9, "GPUREG_TEXENV1_OPERAND"}, + {0x0CA, "GPUREG_TEXENV1_COMBINER"}, + {0x0CB, "GPUREG_TEXENV1_COLOR"}, + {0x0CC, "GPUREG_TEXENV1_SCALE"}, + + {0x0D0, "GPUREG_TEXENV2_SOURCE"}, + {0x0D1, "GPUREG_TEXENV2_OPERAND"}, + {0x0D2, "GPUREG_TEXENV2_COMBINER"}, + {0x0D3, "GPUREG_TEXENV2_COLOR"}, + {0x0D4, "GPUREG_TEXENV2_SCALE"}, + + {0x0D8, "GPUREG_TEXENV3_SOURCE"}, + {0x0D9, "GPUREG_TEXENV3_OPERAND"}, + {0x0DA, "GPUREG_TEXENV3_COMBINER"}, + {0x0DB, "GPUREG_TEXENV3_COLOR"}, + {0x0DC, "GPUREG_TEXENV3_SCALE"}, + + {0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER"}, + {0x0E1, "GPUREG_FOG_COLOR"}, + + {0x0E4, "GPUREG_GAS_ATTENUATION"}, + {0x0E5, "GPUREG_GAS_ACCMAX"}, + {0x0E6, "GPUREG_FOG_LUT_INDEX"}, + + {0x0E8, "GPUREG_FOG_LUT_DATA0"}, + {0x0E9, "GPUREG_FOG_LUT_DATA1"}, + {0x0EA, "GPUREG_FOG_LUT_DATA2"}, + {0x0EB, "GPUREG_FOG_LUT_DATA3"}, + {0x0EC, "GPUREG_FOG_LUT_DATA4"}, + {0x0ED, "GPUREG_FOG_LUT_DATA5"}, + {0x0EE, "GPUREG_FOG_LUT_DATA6"}, + {0x0EF, "GPUREG_FOG_LUT_DATA7"}, + {0x0F0, "GPUREG_TEXENV4_SOURCE"}, + {0x0F1, "GPUREG_TEXENV4_OPERAND"}, + {0x0F2, "GPUREG_TEXENV4_COMBINER"}, + {0x0F3, "GPUREG_TEXENV4_COLOR"}, + {0x0F4, "GPUREG_TEXENV4_SCALE"}, + + {0x0F8, "GPUREG_TEXENV5_SOURCE"}, + {0x0F9, "GPUREG_TEXENV5_OPERAND"}, + {0x0FA, "GPUREG_TEXENV5_COMBINER"}, + {0x0FB, "GPUREG_TEXENV5_COLOR"}, + {0x0FC, "GPUREG_TEXENV5_SCALE"}, + {0x0FD, "GPUREG_TEXENV_BUFFER_COLOR"}, + + {0x100, "GPUREG_COLOR_OPERATION"}, + {0x101, "GPUREG_BLEND_FUNC"}, + {0x102, "GPUREG_LOGIC_OP"}, + {0x103, "GPUREG_BLEND_COLOR"}, + {0x104, "GPUREG_FRAGOP_ALPHA_TEST"}, + {0x105, "GPUREG_STENCIL_TEST"}, + {0x106, "GPUREG_STENCIL_OP"}, + {0x107, "GPUREG_DEPTH_COLOR_MASK"}, + + {0x110, "GPUREG_FRAMEBUFFER_INVALIDATE"}, + {0x111, "GPUREG_FRAMEBUFFER_FLUSH"}, + {0x112, "GPUREG_COLORBUFFER_READ"}, + {0x113, "GPUREG_COLORBUFFER_WRITE"}, + {0x114, "GPUREG_DEPTHBUFFER_READ"}, + {0x115, "GPUREG_DEPTHBUFFER_WRITE"}, + {0x116, "GPUREG_DEPTHBUFFER_FORMAT"}, + {0x117, "GPUREG_COLORBUFFER_FORMAT"}, + {0x118, "GPUREG_EARLYDEPTH_TEST2"}, + + {0x11B, "GPUREG_FRAMEBUFFER_BLOCK32"}, + {0x11C, "GPUREG_DEPTHBUFFER_LOC"}, + {0x11D, "GPUREG_COLORBUFFER_LOC"}, + {0x11E, "GPUREG_FRAMEBUFFER_DIM"}, + + {0x120, "GPUREG_GAS_LIGHT_XY"}, + {0x121, "GPUREG_GAS_LIGHT_Z"}, + {0x122, "GPUREG_GAS_LIGHT_Z_COLOR"}, + {0x123, "GPUREG_GAS_LUT_INDEX"}, + {0x124, "GPUREG_GAS_LUT_DATA"}, + + {0x126, "GPUREG_GAS_DELTAZ_DEPTH"}, + + {0x130, "GPUREG_FRAGOP_SHADOW"}, + + {0x140, "GPUREG_LIGHT0_SPECULAR0"}, + {0x141, "GPUREG_LIGHT0_SPECULAR1"}, + {0x142, "GPUREG_LIGHT0_DIFFUSE"}, + {0x143, "GPUREG_LIGHT0_AMBIENT"}, + {0x144, "GPUREG_LIGHT0_XY"}, + {0x145, "GPUREG_LIGHT0_Z"}, + {0x146, "GPUREG_LIGHT0_SPOTDIR_XY"}, + {0x147, "GPUREG_LIGHT0_SPOTDIR_Z"}, + + {0x149, "GPUREG_LIGHT0_CONFIG"}, + {0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS"}, + {0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE"}, + + {0x150, "GPUREG_LIGHT1_SPECULAR0"}, + {0x151, "GPUREG_LIGHT1_SPECULAR1"}, + {0x152, "GPUREG_LIGHT1_DIFFUSE"}, + {0x153, "GPUREG_LIGHT1_AMBIENT"}, + {0x154, "GPUREG_LIGHT1_XY"}, + {0x155, "GPUREG_LIGHT1_Z"}, + {0x156, "GPUREG_LIGHT1_SPOTDIR_XY"}, + {0x157, "GPUREG_LIGHT1_SPOTDIR_Z"}, + + {0x159, "GPUREG_LIGHT1_CONFIG"}, + {0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS"}, + {0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE"}, + + {0x160, "GPUREG_LIGHT2_SPECULAR0"}, + {0x161, "GPUREG_LIGHT2_SPECULAR1"}, + {0x162, "GPUREG_LIGHT2_DIFFUSE"}, + {0x163, "GPUREG_LIGHT2_AMBIENT"}, + {0x164, "GPUREG_LIGHT2_XY"}, + {0x165, "GPUREG_LIGHT2_Z"}, + {0x166, "GPUREG_LIGHT2_SPOTDIR_XY"}, + {0x167, "GPUREG_LIGHT2_SPOTDIR_Z"}, + + {0x169, "GPUREG_LIGHT2_CONFIG"}, + {0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS"}, + {0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE"}, + + {0x170, "GPUREG_LIGHT3_SPECULAR0"}, + {0x171, "GPUREG_LIGHT3_SPECULAR1"}, + {0x172, "GPUREG_LIGHT3_DIFFUSE"}, + {0x173, "GPUREG_LIGHT3_AMBIENT"}, + {0x174, "GPUREG_LIGHT3_XY"}, + {0x175, "GPUREG_LIGHT3_Z"}, + {0x176, "GPUREG_LIGHT3_SPOTDIR_XY"}, + {0x177, "GPUREG_LIGHT3_SPOTDIR_Z"}, + + {0x179, "GPUREG_LIGHT3_CONFIG"}, + {0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS"}, + {0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE"}, + + {0x180, "GPUREG_LIGHT4_SPECULAR0"}, + {0x181, "GPUREG_LIGHT4_SPECULAR1"}, + {0x182, "GPUREG_LIGHT4_DIFFUSE"}, + {0x183, "GPUREG_LIGHT4_AMBIENT"}, + {0x184, "GPUREG_LIGHT4_XY"}, + {0x185, "GPUREG_LIGHT4_Z"}, + {0x186, "GPUREG_LIGHT4_SPOTDIR_XY"}, + {0x187, "GPUREG_LIGHT4_SPOTDIR_Z"}, + + {0x189, "GPUREG_LIGHT4_CONFIG"}, + {0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS"}, + {0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE"}, + + {0x190, "GPUREG_LIGHT5_SPECULAR0"}, + {0x191, "GPUREG_LIGHT5_SPECULAR1"}, + {0x192, "GPUREG_LIGHT5_DIFFUSE"}, + {0x193, "GPUREG_LIGHT5_AMBIENT"}, + {0x194, "GPUREG_LIGHT5_XY"}, + {0x195, "GPUREG_LIGHT5_Z"}, + {0x196, "GPUREG_LIGHT5_SPOTDIR_XY"}, + {0x197, "GPUREG_LIGHT5_SPOTDIR_Z"}, + + {0x199, "GPUREG_LIGHT5_CONFIG"}, + {0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS"}, + {0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE"}, + + {0x1A0, "GPUREG_LIGHT6_SPECULAR0"}, + {0x1A1, "GPUREG_LIGHT6_SPECULAR1"}, + {0x1A2, "GPUREG_LIGHT6_DIFFUSE"}, + {0x1A3, "GPUREG_LIGHT6_AMBIENT"}, + {0x1A4, "GPUREG_LIGHT6_XY"}, + {0x1A5, "GPUREG_LIGHT6_Z"}, + {0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY"}, + {0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z"}, + + {0x1A9, "GPUREG_LIGHT6_CONFIG"}, + {0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS"}, + {0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE"}, + + {0x1B0, "GPUREG_LIGHT7_SPECULAR0"}, + {0x1B1, "GPUREG_LIGHT7_SPECULAR1"}, + {0x1B2, "GPUREG_LIGHT7_DIFFUSE"}, + {0x1B3, "GPUREG_LIGHT7_AMBIENT"}, + {0x1B4, "GPUREG_LIGHT7_XY"}, + {0x1B5, "GPUREG_LIGHT7_Z"}, + {0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY"}, + {0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z"}, + + {0x1B9, "GPUREG_LIGHT7_CONFIG"}, + {0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS"}, + {0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE"}, + + {0x1C0, "GPUREG_LIGHTING_AMBIENT"}, + + {0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS"}, + {0x1C3, "GPUREG_LIGHTING_CONFIG0"}, + {0x1C4, "GPUREG_LIGHTING_CONFIG1"}, + {0x1C5, "GPUREG_LIGHTING_LUT_INDEX"}, + {0x1C6, "GPUREG_LIGHTING_ENABLE1"}, + + {0x1C8, "GPUREG_LIGHTING_LUT_DATA0"}, + {0x1C9, "GPUREG_LIGHTING_LUT_DATA1"}, + {0x1CA, "GPUREG_LIGHTING_LUT_DATA2"}, + {0x1CB, "GPUREG_LIGHTING_LUT_DATA3"}, + {0x1CC, "GPUREG_LIGHTING_LUT_DATA4"}, + {0x1CD, "GPUREG_LIGHTING_LUT_DATA5"}, + {0x1CE, "GPUREG_LIGHTING_LUT_DATA6"}, + {0x1CF, "GPUREG_LIGHTING_LUT_DATA7"}, + {0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS"}, + {0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT"}, + {0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE"}, + + {0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION"}, + + {0x200, "GPUREG_ATTRIBBUFFERS_LOC"}, + {0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW"}, + {0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH"}, + {0x203, "GPUREG_ATTRIBBUFFER0_OFFSET"}, + {0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1"}, + {0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2"}, + {0x206, "GPUREG_ATTRIBBUFFER1_OFFSET"}, + {0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1"}, + {0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2"}, + {0x209, "GPUREG_ATTRIBBUFFER2_OFFSET"}, + {0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1"}, + {0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2"}, + {0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET"}, + {0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1"}, + {0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2"}, + {0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET"}, + {0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1"}, + {0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2"}, + {0x212, "GPUREG_ATTRIBBUFFER5_OFFSET"}, + {0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1"}, + {0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2"}, + {0x215, "GPUREG_ATTRIBBUFFER6_OFFSET"}, + {0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1"}, + {0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2"}, + {0x218, "GPUREG_ATTRIBBUFFER7_OFFSET"}, + {0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1"}, + {0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2"}, + {0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET"}, + {0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1"}, + {0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2"}, + {0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET"}, + {0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1"}, + {0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2"}, + {0x221, "GPUREG_ATTRIBBUFFER10_OFFSET"}, + {0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1"}, + {0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2"}, + {0x224, "GPUREG_ATTRIBBUFFER11_OFFSET"}, + {0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1"}, + {0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2"}, + {0x227, "GPUREG_INDEXBUFFER_CONFIG"}, + {0x228, "GPUREG_NUMVERTICES"}, + {0x229, "GPUREG_GEOSTAGE_CONFIG"}, + {0x22A, "GPUREG_VERTEX_OFFSET"}, + + {0x22D, "GPUREG_POST_VERTEX_CACHE_NUM"}, + {0x22E, "GPUREG_DRAWARRAYS"}, + {0x22F, "GPUREG_DRAWELEMENTS"}, + + {0x231, "GPUREG_VTX_FUNC"}, + {0x232, "GPUREG_FIXEDATTRIB_INDEX"}, + {0x233, "GPUREG_FIXEDATTRIB_DATA0"}, + {0x234, "GPUREG_FIXEDATTRIB_DATA1"}, + {0x235, "GPUREG_FIXEDATTRIB_DATA2"}, + + {0x238, "GPUREG_CMDBUF_SIZE0"}, + {0x239, "GPUREG_CMDBUF_SIZE1"}, + {0x23A, "GPUREG_CMDBUF_ADDR0"}, + {0x23B, "GPUREG_CMDBUF_ADDR1"}, + {0x23C, "GPUREG_CMDBUF_JUMP0"}, + {0x23D, "GPUREG_CMDBUF_JUMP1"}, + + {0x242, "GPUREG_VSH_NUM_ATTR"}, + + {0x244, "GPUREG_VSH_COM_MODE"}, + {0x245, "GPUREG_START_DRAW_FUNC0"}, + + {0x24A, "GPUREG_VSH_OUTMAP_TOTAL1"}, + + {0x251, "GPUREG_VSH_OUTMAP_TOTAL2"}, + {0x252, "GPUREG_GSH_MISC0"}, + {0x253, "GPUREG_GEOSTAGE_CONFIG2"}, + {0x254, "GPUREG_GSH_MISC1"}, + + {0x25E, "GPUREG_PRIMITIVE_CONFIG"}, + {0x25F, "GPUREG_RESTART_PRIMITIVE"}, + + {0x280, "GPUREG_GSH_BOOLUNIFORM"}, + {0x281, "GPUREG_GSH_INTUNIFORM_I0"}, + {0x282, "GPUREG_GSH_INTUNIFORM_I1"}, + {0x283, "GPUREG_GSH_INTUNIFORM_I2"}, + {0x284, "GPUREG_GSH_INTUNIFORM_I3"}, + + {0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG"}, + {0x28A, "GPUREG_GSH_ENTRYPOINT"}, + {0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW"}, + {0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH"}, + {0x28D, "GPUREG_GSH_OUTMAP_MASK"}, + + {0x28F, "GPUREG_GSH_CODETRANSFER_END"}, + {0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX"}, + {0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0"}, + {0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1"}, + {0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2"}, + {0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3"}, + {0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4"}, + {0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5"}, + {0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6"}, + {0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7"}, + + {0x29B, "GPUREG_GSH_CODETRANSFER_INDEX"}, + {0x29C, "GPUREG_GSH_CODETRANSFER_DATA0"}, + {0x29D, "GPUREG_GSH_CODETRANSFER_DATA1"}, + {0x29E, "GPUREG_GSH_CODETRANSFER_DATA2"}, + {0x29F, "GPUREG_GSH_CODETRANSFER_DATA3"}, + {0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4"}, + {0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5"}, + {0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6"}, + {0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7"}, + + {0x2A5, "GPUREG_GSH_OPDESCS_INDEX"}, + {0x2A6, "GPUREG_GSH_OPDESCS_DATA0"}, + {0x2A7, "GPUREG_GSH_OPDESCS_DATA1"}, + {0x2A8, "GPUREG_GSH_OPDESCS_DATA2"}, + {0x2A9, "GPUREG_GSH_OPDESCS_DATA3"}, + {0x2AA, "GPUREG_GSH_OPDESCS_DATA4"}, + {0x2AB, "GPUREG_GSH_OPDESCS_DATA5"}, + {0x2AC, "GPUREG_GSH_OPDESCS_DATA6"}, + {0x2AD, "GPUREG_GSH_OPDESCS_DATA7"}, + + {0x2B0, "GPUREG_VSH_BOOLUNIFORM"}, + {0x2B1, "GPUREG_VSH_INTUNIFORM_I0"}, + {0x2B2, "GPUREG_VSH_INTUNIFORM_I1"}, + {0x2B3, "GPUREG_VSH_INTUNIFORM_I2"}, + {0x2B4, "GPUREG_VSH_INTUNIFORM_I3"}, + + {0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG"}, + {0x2BA, "GPUREG_VSH_ENTRYPOINT"}, + {0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW"}, + {0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH"}, + {0x2BD, "GPUREG_VSH_OUTMAP_MASK"}, + + {0x2BF, "GPUREG_VSH_CODETRANSFER_END"}, + {0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX"}, + {0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0"}, + {0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1"}, + {0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2"}, + {0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3"}, + {0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4"}, + {0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5"}, + {0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6"}, + {0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7"}, + + {0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX"}, + {0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0"}, + {0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1"}, + {0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2"}, + {0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3"}, + {0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4"}, + {0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5"}, + {0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6"}, + {0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7"}, + + {0x2D5, "GPUREG_VSH_OPDESCS_INDEX"}, + {0x2D6, "GPUREG_VSH_OPDESCS_DATA0"}, + {0x2D7, "GPUREG_VSH_OPDESCS_DATA1"}, + {0x2D8, "GPUREG_VSH_OPDESCS_DATA2"}, + {0x2D9, "GPUREG_VSH_OPDESCS_DATA3"}, + {0x2DA, "GPUREG_VSH_OPDESCS_DATA4"}, + {0x2DB, "GPUREG_VSH_OPDESCS_DATA5"}, + {0x2DC, "GPUREG_VSH_OPDESCS_DATA6"}, + {0x2DD, "GPUREG_VSH_OPDESCS_DATA7"}, }; std::string Regs::GetCommandName(int index) { @@ -516,5 +515,4 @@ void State::Reset() { Zero(immediate); primitive_assembler.Reconfigure(Regs::TriangleTopology::List); } - } diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 7099c31a0..b2db609ec 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -16,15 +16,16 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "common/vector_math.h" #include "common/logging/log.h" +#include "common/vector_math.h" namespace Pica { // Returns index corresponding to the Regs member labeled by field_name // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions // when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). -// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members +// For details cf. +// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members // Hopefully, this will be fixed sometime in the future. // For lack of better alternatives, we currently hardcode the offsets when constant // expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts @@ -37,8 +38,9 @@ namespace Pica { // really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX // and then performs a (no-op) cast to size_t iff the second argument matches the expected // field offset. Otherwise, the compiler will fail to compile this code. -#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ - ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name)) +#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ + ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), \ + size_t>::type)PICA_REG_INDEX(field_name)) #endif // _MSC_VER struct Regs { @@ -51,8 +53,8 @@ struct Regs { enum class CullMode : u32 { // Select which polygons are considered to be "frontfacing". - KeepAll = 0, - KeepClockWise = 1, + KeepAll = 0, + KeepClockWise = 1, KeepCounterClockWise = 2, // TODO: What does the third value imply? }; @@ -69,48 +71,47 @@ struct Regs { INSERT_PADDING_WORDS(0x9); - BitField<0, 24, u32> viewport_depth_range; // float24 + BitField<0, 24, u32> viewport_depth_range; // float24 BitField<0, 24, u32> viewport_depth_near_plane; // float24 BitField<0, 3, u32> vs_output_total; union VSOutputAttributes { // Maps components of output vertex attributes to semantics - enum Semantic : u32 - { - POSITION_X = 0, - POSITION_Y = 1, - POSITION_Z = 2, - POSITION_W = 3, - - QUATERNION_X = 4, - QUATERNION_Y = 5, - QUATERNION_Z = 6, - QUATERNION_W = 7, - - COLOR_R = 8, - COLOR_G = 9, - COLOR_B = 10, - COLOR_A = 11, - - TEXCOORD0_U = 12, - TEXCOORD0_V = 13, - TEXCOORD1_U = 14, - TEXCOORD1_V = 15, + enum Semantic : u32 { + POSITION_X = 0, + POSITION_Y = 1, + POSITION_Z = 2, + POSITION_W = 3, + + QUATERNION_X = 4, + QUATERNION_Y = 5, + QUATERNION_Z = 6, + QUATERNION_W = 7, + + COLOR_R = 8, + COLOR_G = 9, + COLOR_B = 10, + COLOR_A = 11, + + TEXCOORD0_U = 12, + TEXCOORD0_V = 13, + TEXCOORD1_U = 14, + TEXCOORD1_V = 15, // TODO: Not verified - VIEW_X = 18, - VIEW_Y = 19, - VIEW_Z = 20, + VIEW_X = 18, + VIEW_Y = 19, + VIEW_Z = 20, - TEXCOORD2_U = 22, - TEXCOORD2_V = 23, + TEXCOORD2_U = 22, + TEXCOORD2_V = 23, - INVALID = 31, + INVALID = 31, }; - BitField< 0, 5, Semantic> map_x; - BitField< 8, 5, Semantic> map_y; + BitField<0, 5, Semantic> map_x; + BitField<8, 5, Semantic> map_y; BitField<16, 5, Semantic> map_z; BitField<24, 5, Semantic> map_w; } vs_output_attributes[7]; @@ -128,77 +129,78 @@ struct Regs { BitField<0, 2, ScissorMode> mode; union { - BitField< 0, 16, u32> x1; + BitField<0, 16, u32> x1; BitField<16, 16, u32> y1; }; union { - BitField< 0, 16, u32> x2; + BitField<0, 16, u32> x2; BitField<16, 16, u32> y2; }; } scissor_test; union { - BitField< 0, 10, s32> x; + BitField<0, 10, s32> x; BitField<16, 10, s32> y; } viewport_corner; INSERT_PADDING_WORDS(0x1); - //TODO: early depth + // TODO: early depth INSERT_PADDING_WORDS(0x1); INSERT_PADDING_WORDS(0x2); enum DepthBuffering : u32 { - WBuffering = 0, - ZBuffering = 1, + WBuffering = 0, + ZBuffering = 1, }; - BitField< 0, 1, DepthBuffering> depthmap_enable; + BitField<0, 1, DepthBuffering> depthmap_enable; INSERT_PADDING_WORDS(0x12); struct TextureConfig { enum TextureType : u32 { - Texture2D = 0, - TextureCube = 1, - Shadow2D = 2, + Texture2D = 0, + TextureCube = 1, + Shadow2D = 2, Projection2D = 3, - ShadowCube = 4, - Disabled = 5, + ShadowCube = 4, + Disabled = 5, }; enum WrapMode : u32 { - ClampToEdge = 0, - ClampToBorder = 1, - Repeat = 2, + ClampToEdge = 0, + ClampToBorder = 1, + Repeat = 2, MirroredRepeat = 3, }; enum TextureFilter : u32 { Nearest = 0, - Linear = 1 + Linear = 1, }; union { u32 raw; - BitField< 0, 8, u32> r; - BitField< 8, 8, u32> g; + BitField<0, 8, u32> r; + BitField<8, 8, u32> g; BitField<16, 8, u32> b; BitField<24, 8, u32> a; } border_color; union { - BitField< 0, 16, u32> height; + BitField<0, 16, u32> height; BitField<16, 16, u32> width; }; union { - BitField< 1, 1, TextureFilter> mag_filter; - BitField< 2, 1, TextureFilter> min_filter; - BitField< 8, 2, WrapMode> wrap_t; + BitField<1, 1, TextureFilter> mag_filter; + BitField<2, 1, TextureFilter> min_filter; + BitField<8, 2, WrapMode> wrap_t; BitField<12, 2, WrapMode> wrap_s; - BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew. + BitField<28, 2, TextureType> + type; ///< @note Only valid for texture 0 according to 3DBrew. }; INSERT_PADDING_WORDS(0x1); @@ -216,39 +218,39 @@ struct Regs { }; enum class TextureFormat : u32 { - RGBA8 = 0, - RGB8 = 1, - RGB5A1 = 2, - RGB565 = 3, - RGBA4 = 4, - IA8 = 5, - RG8 = 6, ///< @note Also called HILO8 in 3DBrew. - I8 = 7, - A8 = 8, - IA4 = 9, - I4 = 10, - A4 = 11, - ETC1 = 12, // compressed - ETC1A4 = 13, // compressed + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + IA8 = 5, + RG8 = 6, ///< @note Also called HILO8 in 3DBrew. + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, // compressed + ETC1A4 = 13, // compressed }; enum class LogicOp : u32 { - Clear = 0, - And = 1, - AndReverse = 2, - Copy = 3, - Set = 4, - CopyInverted = 5, - NoOp = 6, - Invert = 7, - Nand = 8, - Or = 9, - Nor = 10, - Xor = 11, - Equiv = 12, - AndInverted = 13, - OrReverse = 14, - OrInverted = 15, + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15, }; static unsigned NibblesPerPixel(TextureFormat format) { @@ -273,15 +275,15 @@ struct Regs { case TextureFormat::I8: case TextureFormat::A8: case TextureFormat::IA4: - default: // placeholder for yet unknown formats + default: // placeholder for yet unknown formats return 2; } } union { - BitField< 0, 1, u32> texture0_enable; - BitField< 1, 1, u32> texture1_enable; - BitField< 2, 1, u32> texture2_enable; + BitField<0, 1, u32> texture0_enable; + BitField<1, 1, u32> texture1_enable; + BitField<2, 1, u32> texture2_enable; }; TextureConfig texture0; INSERT_PADDING_WORDS(0x8); @@ -302,63 +304,63 @@ struct Regs { }; const std::array<FullTextureConfig, 3> GetTextures() const { return {{ - { texture0_enable.ToBool(), texture0, texture0_format }, - { texture1_enable.ToBool(), texture1, texture1_format }, - { texture2_enable.ToBool(), texture2, texture2_format } - }}; + {texture0_enable.ToBool(), texture0, texture0_format}, + {texture1_enable.ToBool(), texture1, texture1_format}, + {texture2_enable.ToBool(), texture2, texture2_format}, + }}; } // 0xc0-0xff: Texture Combiner (akin to glTexEnv) struct TevStageConfig { enum class Source : u32 { - PrimaryColor = 0x0, - PrimaryFragmentColor = 0x1, + PrimaryColor = 0x0, + PrimaryFragmentColor = 0x1, SecondaryFragmentColor = 0x2, - Texture0 = 0x3, - Texture1 = 0x4, - Texture2 = 0x5, - Texture3 = 0x6, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, - PreviousBuffer = 0xd, - Constant = 0xe, - Previous = 0xf, + PreviousBuffer = 0xd, + Constant = 0xe, + Previous = 0xf, }; enum class ColorModifier : u32 { - SourceColor = 0x0, + SourceColor = 0x0, OneMinusSourceColor = 0x1, - SourceAlpha = 0x2, + SourceAlpha = 0x2, OneMinusSourceAlpha = 0x3, - SourceRed = 0x4, - OneMinusSourceRed = 0x5, + SourceRed = 0x4, + OneMinusSourceRed = 0x5, - SourceGreen = 0x8, + SourceGreen = 0x8, OneMinusSourceGreen = 0x9, - SourceBlue = 0xc, - OneMinusSourceBlue = 0xd, + SourceBlue = 0xc, + OneMinusSourceBlue = 0xd, }; enum class AlphaModifier : u32 { - SourceAlpha = 0x0, + SourceAlpha = 0x0, OneMinusSourceAlpha = 0x1, - SourceRed = 0x2, - OneMinusSourceRed = 0x3, - SourceGreen = 0x4, + SourceRed = 0x2, + OneMinusSourceRed = 0x3, + SourceGreen = 0x4, OneMinusSourceGreen = 0x5, - SourceBlue = 0x6, - OneMinusSourceBlue = 0x7, + SourceBlue = 0x6, + OneMinusSourceBlue = 0x7, }; enum class Operation : u32 { - Replace = 0, - Modulate = 1, - Add = 2, - AddSigned = 3, - Lerp = 4, - Subtract = 5, - Dot3_RGB = 6, + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + Dot3_RGB = 6, MultiplyThenAdd = 8, AddThenMultiply = 9, @@ -366,9 +368,9 @@ struct Regs { union { u32 sources_raw; - BitField< 0, 4, Source> color_source1; - BitField< 4, 4, Source> color_source2; - BitField< 8, 4, Source> color_source3; + BitField<0, 4, Source> color_source1; + BitField<4, 4, Source> color_source2; + BitField<8, 4, Source> color_source3; BitField<16, 4, Source> alpha_source1; BitField<20, 4, Source> alpha_source2; BitField<24, 4, Source> alpha_source3; @@ -376,9 +378,9 @@ struct Regs { union { u32 modifiers_raw; - BitField< 0, 4, ColorModifier> color_modifier1; - BitField< 4, 4, ColorModifier> color_modifier2; - BitField< 8, 4, ColorModifier> color_modifier3; + BitField<0, 4, ColorModifier> color_modifier1; + BitField<4, 4, ColorModifier> color_modifier2; + BitField<8, 4, ColorModifier> color_modifier3; BitField<12, 3, AlphaModifier> alpha_modifier1; BitField<16, 3, AlphaModifier> alpha_modifier2; BitField<20, 3, AlphaModifier> alpha_modifier3; @@ -386,21 +388,21 @@ struct Regs { union { u32 ops_raw; - BitField< 0, 4, Operation> color_op; + BitField<0, 4, Operation> color_op; BitField<16, 4, Operation> alpha_op; }; union { u32 const_color; - BitField< 0, 8, u32> const_r; - BitField< 8, 8, u32> const_g; + BitField<0, 8, u32> const_r; + BitField<8, 8, u32> const_g; BitField<16, 8, u32> const_b; BitField<24, 8, u32> const_a; }; union { u32 scales_raw; - BitField< 0, 2, u32> color_scale; + BitField<0, 2, u32> color_scale; BitField<16, 2, u32> alpha_scale; }; @@ -424,8 +426,8 @@ struct Regs { enum class FogMode : u32 { None = 0, - Fog = 5, - Gas = 7, + Fog = 5, + Gas = 7, }; union { @@ -435,7 +437,7 @@ struct Regs { union { // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in // these masks are set - BitField< 8, 4, u32> update_mask_rgb; + BitField<8, 4, u32> update_mask_rgb; BitField<12, 4, u32> update_mask_a; bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { @@ -450,8 +452,8 @@ struct Regs { union { u32 raw; - BitField< 0, 8, u32> r; - BitField< 8, 8, u32> g; + BitField<0, 8, u32> r; + BitField<8, 8, u32> g; BitField<16, 8, u32> b; } fog_color; @@ -469,66 +471,64 @@ struct Regs { union { u32 raw; - BitField< 0, 8, u32> r; - BitField< 8, 8, u32> g; + BitField<0, 8, u32> r; + BitField<8, 8, u32> g; BitField<16, 8, u32> b; BitField<24, 8, u32> a; } tev_combiner_buffer_color; INSERT_PADDING_WORDS(0x2); - const std::array<Regs::TevStageConfig,6> GetTevStages() const { - return {{ tev_stage0, tev_stage1, - tev_stage2, tev_stage3, - tev_stage4, tev_stage5 }}; + const std::array<Regs::TevStageConfig, 6> GetTevStages() const { + return {{tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5}}; }; enum class BlendEquation : u32 { - Add = 0, - Subtract = 1, + Add = 0, + Subtract = 1, ReverseSubtract = 2, - Min = 3, - Max = 4, + Min = 3, + Max = 4, }; enum class BlendFactor : u32 { - Zero = 0, - One = 1, - SourceColor = 2, - OneMinusSourceColor = 3, - DestColor = 4, - OneMinusDestColor = 5, - SourceAlpha = 6, - OneMinusSourceAlpha = 7, - DestAlpha = 8, - OneMinusDestAlpha = 9, - ConstantColor = 10, - OneMinusConstantColor = 11, - ConstantAlpha = 12, - OneMinusConstantAlpha = 13, - SourceAlphaSaturate = 14, + Zero = 0, + One = 1, + SourceColor = 2, + OneMinusSourceColor = 3, + DestColor = 4, + OneMinusDestColor = 5, + SourceAlpha = 6, + OneMinusSourceAlpha = 7, + DestAlpha = 8, + OneMinusDestAlpha = 9, + ConstantColor = 10, + OneMinusConstantColor = 11, + ConstantAlpha = 12, + OneMinusConstantAlpha = 13, + SourceAlphaSaturate = 14, }; enum class CompareFunc : u32 { - Never = 0, - Always = 1, - Equal = 2, - NotEqual = 3, - LessThan = 4, - LessThanOrEqual = 5, - GreaterThan = 6, + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + LessThan = 4, + LessThanOrEqual = 5, + GreaterThan = 6, GreaterThanOrEqual = 7, }; enum class StencilAction : u32 { - Keep = 0, - Zero = 1, - Replace = 2, - Increment = 3, - Decrement = 4, - Invert = 5, - IncrementWrap = 6, - DecrementWrap = 7 + Keep = 0, + Zero = 1, + Replace = 2, + Increment = 3, + Decrement = 4, + Invert = 5, + IncrementWrap = 6, + DecrementWrap = 7, }; struct { @@ -538,8 +538,8 @@ struct Regs { }; union { - BitField< 0, 8, BlendEquation> blend_equation_rgb; - BitField< 8, 8, BlendEquation> blend_equation_a; + BitField<0, 8, BlendEquation> blend_equation_rgb; + BitField<8, 8, BlendEquation> blend_equation_a; BitField<16, 4, BlendFactor> factor_source_rgb; BitField<20, 4, BlendFactor> factor_dest_rgb; @@ -554,16 +554,16 @@ struct Regs { union { u32 raw; - BitField< 0, 8, u32> r; - BitField< 8, 8, u32> g; + BitField<0, 8, u32> r; + BitField<8, 8, u32> g; BitField<16, 8, u32> b; BitField<24, 8, u32> a; } blend_const; union { - BitField< 0, 1, u32> enable; - BitField< 4, 3, CompareFunc> func; - BitField< 8, 8, u32> ref; + BitField<0, 1, u32> enable; + BitField<4, 3, CompareFunc> func; + BitField<8, 8, u32> ref; } alpha_test; struct { @@ -572,13 +572,13 @@ struct Regs { u32 raw_func; // If true, enable stencil testing - BitField< 0, 1, u32> enable; + BitField<0, 1, u32> enable; // Comparison operation for stencil testing - BitField< 4, 3, CompareFunc> func; + BitField<4, 3, CompareFunc> func; // Mask used to control writing to the stencil buffer - BitField< 8, 8, u32> write_mask; + BitField<8, 8, u32> write_mask; // Value to compare against for stencil testing BitField<16, 8, u32> reference_value; @@ -592,21 +592,21 @@ struct Regs { u32 raw_op; // Action to perform when the stencil test fails - BitField< 0, 3, StencilAction> action_stencil_fail; + BitField<0, 3, StencilAction> action_stencil_fail; // Action to perform when stencil testing passed but depth testing fails - BitField< 4, 3, StencilAction> action_depth_fail; + BitField<4, 3, StencilAction> action_depth_fail; // Action to perform when both stencil and depth testing pass - BitField< 8, 3, StencilAction> action_depth_pass; + BitField<8, 3, StencilAction> action_depth_pass; }; } stencil_test; union { - BitField< 0, 1, u32> depth_test_enable; - BitField< 4, 3, CompareFunc> depth_test_func; - BitField< 8, 1, u32> red_enable; - BitField< 9, 1, u32> green_enable; + BitField<0, 1, u32> depth_test_enable; + BitField<4, 3, CompareFunc> depth_test_func; + BitField<8, 1, u32> red_enable; + BitField<9, 1, u32> green_enable; BitField<10, 1, u32> blue_enable; BitField<11, 1, u32> alpha_enable; BitField<12, 1, u32> depth_write_enable; @@ -617,16 +617,16 @@ struct Regs { // Components are laid out in reverse byte order, most significant bits first. enum class ColorFormat : u32 { - RGBA8 = 0, - RGB8 = 1, + RGBA8 = 0, + RGB8 = 1, RGB5A1 = 2, RGB565 = 3, - RGBA4 = 4, + RGBA4 = 4, }; enum class DepthFormat : u32 { - D16 = 0, - D24 = 2, + D16 = 0, + D24 = 2, D24S8 = 3, }; @@ -673,7 +673,7 @@ struct Regs { // while the height is stored as the actual height minus one. // Hence, don't access these fields directly but use the accessors // GetWidth() and GetHeight() instead. - BitField< 0, 11, u32> width; + BitField<0, 11, u32> width; BitField<12, 10, u32> height; }; @@ -759,10 +759,12 @@ struct Regs { /// Selects which lighting components are affected by fresnel enum class LightingFresnelSelector { - None = 0, ///< Fresnel is disabled - PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel - SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel - Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel + None = 0, ///< Fresnel is disabled + PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel + SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel + Both = + PrimaryAlpha | + SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel }; /// Factor used to scale the output of a lighting LUT @@ -789,57 +791,63 @@ struct Regs { }; union LightColor { - BitField< 0, 10, u32> b; + BitField<0, 10, u32> b; BitField<10, 10, u32> g; BitField<20, 10, u32> r; Math::Vec3f ToVec3f() const { - // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component + // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color + // component return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); } }; - /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration + /// Returns true if the specified lighting sampler is supported by the current Pica lighting + /// configuration static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { switch (sampler) { case LightingSampler::Distribution0: return (config != LightingConfig::Config1); case LightingSampler::Distribution1: - return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); + return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && + (config != LightingConfig::Config5); case LightingSampler::Fresnel: - return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); + return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && + (config != LightingConfig::Config4); case LightingSampler::ReflectRed: return (config != LightingConfig::Config3); case LightingSampler::ReflectGreen: case LightingSampler::ReflectBlue: - return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); + return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || + (config == LightingConfig::Config7); default: UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached " "unreachable section, sampler should be one " "of Distribution0, Distribution1, Fresnel, " "ReflectRed, ReflectGreen or ReflectBlue, instead " - "got %i", static_cast<int>(config)); + "got %i", + static_cast<int>(config)); } } struct { struct LightSrc { - LightColor specular_0; // material.specular_0 * light.specular_0 - LightColor specular_1; // material.specular_1 * light.specular_1 - LightColor diffuse; // material.diffuse * light.diffuse - LightColor ambient; // material.ambient * light.ambient + LightColor specular_0; // material.specular_0 * light.specular_0 + LightColor specular_1; // material.specular_1 * light.specular_1 + LightColor diffuse; // material.diffuse * light.diffuse + LightColor ambient; // material.ambient * light.ambient // Encoded as 16-bit floating point union { - BitField< 0, 16, u32> x; + BitField<0, 16, u32> x; BitField<16, 16, u32> y; }; union { - BitField< 0, 16, u32> z; + BitField<0, 16, u32> z; }; INSERT_PADDING_WORDS(0x3); @@ -854,7 +862,8 @@ struct Regs { INSERT_PADDING_WORDS(0x4); }; - static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); + static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), + "LightSrc structure must be 0x10 words"); LightSrc light[8]; LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) @@ -862,8 +871,8 @@ struct Regs { BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 union { - BitField< 2, 2, LightingFresnelSelector> fresnel_selector; - BitField< 4, 4, LightingConfig> config; + BitField<2, 2, LightingFresnelSelector> fresnel_selector; + BitField<4, 4, LightingConfig> config; BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 BitField<27, 1, u32> clamp_highlights; BitField<28, 2, LightingBumpMode> bump_mode; @@ -892,16 +901,17 @@ struct Regs { } config1; bool IsDistAttenDisabled(unsigned index) const { - const unsigned disable[] = { config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, - config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, - config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, - config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7 }; + const unsigned disable[] = { + config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, + config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, + config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, + config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7}; return disable[index] != 0; } union { - BitField<0, 8, u32> index; ///< Index at which to set data in the LUT - BitField<8, 5, u32> type; ///< Type of LUT for which to set data + BitField<0, 8, u32> index; ///< Index at which to set data in the LUT + BitField<8, 5, u32> type; ///< Type of LUT for which to set data } lut_config; BitField<0, 1, u32> disable; @@ -917,9 +927,9 @@ struct Regs { // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in // the range of (0.0, 1.0). union { - BitField< 1, 1, u32> disable_d0; - BitField< 5, 1, u32> disable_d1; - BitField< 9, 1, u32> disable_sp; + BitField<1, 1, u32> disable_d0; + BitField<5, 1, u32> disable_d1; + BitField<9, 1, u32> disable_sp; BitField<13, 1, u32> disable_fr; BitField<17, 1, u32> disable_rb; BitField<21, 1, u32> disable_rg; @@ -927,9 +937,9 @@ struct Regs { } abs_lut_input; union { - BitField< 0, 3, LightingLutInput> d0; - BitField< 4, 3, LightingLutInput> d1; - BitField< 8, 3, LightingLutInput> sp; + BitField<0, 3, LightingLutInput> d0; + BitField<4, 3, LightingLutInput> d1; + BitField<8, 3, LightingLutInput> sp; BitField<12, 3, LightingLutInput> fr; BitField<16, 3, LightingLutInput> rb; BitField<20, 3, LightingLutInput> rg; @@ -937,9 +947,9 @@ struct Regs { } lut_input; union { - BitField< 0, 3, LightingScale> d0; - BitField< 4, 3, LightingScale> d1; - BitField< 8, 3, LightingScale> sp; + BitField<0, 3, LightingScale> d0; + BitField<4, 3, LightingScale> d1; + BitField<8, 3, LightingScale> sp; BitField<12, 3, LightingScale> fr; BitField<16, 3, LightingScale> rb; BitField<20, 3, LightingScale> rg; @@ -972,9 +982,9 @@ struct Regs { // above), the first N slots below will be set to integers within the range of 0-7, // corresponding to the actual light that is enabled for each slot. - BitField< 0, 3, u32> slot_0; - BitField< 4, 3, u32> slot_1; - BitField< 8, 3, u32> slot_2; + BitField<0, 3, u32> slot_0; + BitField<4, 3, u32> slot_1; + BitField<8, 3, u32> slot_2; BitField<12, 3, u32> slot_3; BitField<16, 3, u32> slot_4; BitField<20, 3, u32> slot_5; @@ -982,7 +992,8 @@ struct Regs { BitField<28, 3, u32> slot_7; unsigned GetNum(unsigned index) const { - const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; + const unsigned enable_slots[] = {slot_0, slot_1, slot_2, slot_3, + slot_4, slot_5, slot_6, slot_7}; return enable_slots[index]; } } light_enable; @@ -1006,58 +1017,54 @@ struct Regs { // Descriptor for internal vertex attributes union { - BitField< 0, 2, VertexAttributeFormat> format0; // size of one element - BitField< 2, 2, u64> size0; // number of elements minus 1 - BitField< 4, 2, VertexAttributeFormat> format1; - BitField< 6, 2, u64> size1; - BitField< 8, 2, VertexAttributeFormat> format2; - BitField<10, 2, u64> size2; - BitField<12, 2, VertexAttributeFormat> format3; - BitField<14, 2, u64> size3; - BitField<16, 2, VertexAttributeFormat> format4; - BitField<18, 2, u64> size4; - BitField<20, 2, VertexAttributeFormat> format5; - BitField<22, 2, u64> size5; - BitField<24, 2, VertexAttributeFormat> format6; - BitField<26, 2, u64> size6; - BitField<28, 2, VertexAttributeFormat> format7; - BitField<30, 2, u64> size7; - BitField<32, 2, VertexAttributeFormat> format8; - BitField<34, 2, u64> size8; - BitField<36, 2, VertexAttributeFormat> format9; - BitField<38, 2, u64> size9; - BitField<40, 2, VertexAttributeFormat> format10; - BitField<42, 2, u64> size10; - BitField<44, 2, VertexAttributeFormat> format11; - BitField<46, 2, u64> size11; + BitField<0, 2, VertexAttributeFormat> format0; // size of one element + BitField<2, 2, u64> size0; // number of elements minus 1 + BitField<4, 2, VertexAttributeFormat> format1; + BitField<6, 2, u64> size1; + BitField<8, 2, VertexAttributeFormat> format2; + BitField<10, 2, u64> size2; + BitField<12, 2, VertexAttributeFormat> format3; + BitField<14, 2, u64> size3; + BitField<16, 2, VertexAttributeFormat> format4; + BitField<18, 2, u64> size4; + BitField<20, 2, VertexAttributeFormat> format5; + BitField<22, 2, u64> size5; + BitField<24, 2, VertexAttributeFormat> format6; + BitField<26, 2, u64> size6; + BitField<28, 2, VertexAttributeFormat> format7; + BitField<30, 2, u64> size7; + BitField<32, 2, VertexAttributeFormat> format8; + BitField<34, 2, u64> size8; + BitField<36, 2, VertexAttributeFormat> format9; + BitField<38, 2, u64> size9; + BitField<40, 2, VertexAttributeFormat> format10; + BitField<42, 2, u64> size10; + BitField<44, 2, VertexAttributeFormat> format11; + BitField<46, 2, u64> size11; BitField<48, 12, u64> attribute_mask; // number of total attributes minus 1 - BitField<60, 4, u64> num_extra_attributes; + BitField<60, 4, u64> num_extra_attributes; }; inline VertexAttributeFormat GetFormat(int n) const { - VertexAttributeFormat formats[] = { - format0, format1, format2, format3, - format4, format5, format6, format7, - format8, format9, format10, format11 - }; + VertexAttributeFormat formats[] = {format0, format1, format2, format3, + format4, format5, format6, format7, + format8, format9, format10, format11}; return formats[n]; } inline int GetNumElements(int n) const { - u64 sizes[] = { - size0, size1, size2, size3, - size4, size5, size6, size7, - size8, size9, size10, size11 - }; - return (int)sizes[n]+1; + u64 sizes[] = {size0, size1, size2, size3, size4, size5, + size6, size7, size8, size9, size10, size11}; + return (int)sizes[n] + 1; } inline int GetElementSizeInBytes(int n) const { - return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 : - (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; + return (GetFormat(n) == VertexAttributeFormat::FLOAT) + ? 4 + : (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; } inline int GetStride(int n) const { @@ -1069,7 +1076,7 @@ struct Regs { } inline int GetNumTotalAttributes() const { - return (int)num_extra_attributes+1; + return (int)num_extra_attributes + 1; } // Attribute loaders map the source vertex data to input attributes @@ -1079,9 +1086,9 @@ struct Regs { u32 data_offset; union { - BitField< 0, 4, u64> comp0; - BitField< 4, 4, u64> comp1; - BitField< 8, 4, u64> comp2; + BitField<0, 4, u64> comp0; + BitField<4, 4, u64> comp1; + BitField<8, 4, u64> comp2; BitField<12, 4, u64> comp3; BitField<16, 4, u64> comp4; BitField<20, 4, u64> comp5; @@ -1099,11 +1106,8 @@ struct Regs { }; inline int GetComponent(int n) const { - u64 components[] = { - comp0, comp1, comp2, comp3, - comp4, comp5, comp6, comp7, - comp8, comp9, comp10, comp11 - }; + u64 components[] = {comp0, comp1, comp2, comp3, comp4, comp5, + comp6, comp7, comp8, comp9, comp10, comp11}; return (int)components[n]; } } attribute_loaders[12]; @@ -1157,8 +1161,8 @@ struct Regs { // kicked off. // 2) Games can configure these registers to provide a command list subroutine mechanism. - BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer - BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer + BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer + BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to unsigned GetSize(unsigned index) const { @@ -1176,7 +1180,7 @@ struct Regs { enum class GPUMode : u32 { Drawing = 0, - Configuring = 1 + Configuring = 1, }; GPUMode gpu_mode; @@ -1184,9 +1188,9 @@ struct Regs { INSERT_PADDING_WORDS(0x18); enum class TriangleTopology : u32 { - List = 0, - Strip = 1, - Fan = 2, + List = 0, + Strip = 1, + Fan = 2, Shader = 3, // Programmable setup unit implemented in a geometry shader }; @@ -1200,8 +1204,8 @@ struct Regs { BitField<0, 16, u32> bool_uniforms; union { - BitField< 0, 8, u32> x; - BitField< 8, 8, u32> y; + BitField<0, 8, u32> x; + BitField<8, 8, u32> y; BitField<16, 8, u32> z; BitField<24, 8, u32> w; } int_uniforms[4]; @@ -1217,9 +1221,9 @@ struct Regs { BitField<0, 16, u32> main_offset; union { - BitField< 0, 4, u64> attribute0_register; - BitField< 4, 4, u64> attribute1_register; - BitField< 8, 4, u64> attribute2_register; + BitField<0, 4, u64> attribute0_register; + BitField<4, 4, u64> attribute1_register; + BitField<8, 4, u64> attribute2_register; BitField<12, 4, u64> attribute3_register; BitField<16, 4, u64> attribute4_register; BitField<20, 4, u64> attribute5_register; @@ -1236,10 +1240,12 @@ struct Regs { int GetRegisterForAttribute(int attribute_index) const { u64 fields[] = { - attribute0_register, attribute1_register, attribute2_register, attribute3_register, - attribute4_register, attribute5_register, attribute6_register, attribute7_register, - attribute8_register, attribute9_register, attribute10_register, attribute11_register, - attribute12_register, attribute13_register, attribute14_register, attribute15_register, + attribute0_register, attribute1_register, attribute2_register, + attribute3_register, attribute4_register, attribute5_register, + attribute6_register, attribute7_register, attribute8_register, + attribute9_register, attribute10_register, attribute11_register, + attribute12_register, attribute13_register, attribute14_register, + attribute15_register, }; return (int)fields[attribute_index]; } @@ -1251,10 +1257,9 @@ struct Regs { INSERT_PADDING_WORDS(0x2); struct { - enum Format : u32 - { + enum Format : u32 { FLOAT24 = 0, - FLOAT32 = 1 + FLOAT32 = 1, }; bool IsFloat32() const { @@ -1263,7 +1268,8 @@ struct Regs { union { // Index of the next uniform to write to - // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices + // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid + // indices // TODO: Maybe the uppermost index is for the geometry shader? Investigate! BitField<0, 7, u32> index; @@ -1315,12 +1321,12 @@ struct Regs { return sizeof(Regs) / sizeof(u32); } - const u32& operator [] (int index) const { + const u32& operator[](int index) const { const u32* content = reinterpret_cast<const u32*>(this); return content[index]; } - u32& operator [] (int index) { + u32& operator[](int index) { u32* content = reinterpret_cast<u32*>(this); return content[index]; } @@ -1339,7 +1345,9 @@ private: // is technically allowed since C++11. This macro should be enabled once MSVC adds // support for that. #ifndef _MSC_VER -#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") ASSERT_REG_POSITION(trigger_irq, 0x10); ASSERT_REG_POSITION(cull_mode, 0x40); @@ -1392,11 +1400,15 @@ ASSERT_REG_POSITION(vs, 0x2b0); #undef ASSERT_REG_POSITION #endif // !defined(_MSC_VER) -static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); +static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), + "ShaderConfig structure has incorrect size"); -// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. -static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); -static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); +// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value +// anyway. +static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), + "Register set structure larger than it should be"); +static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), + "Register set structure smaller than it should be"); /// Initialize Pica state void Init(); diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 01f4285a8..e4f2e6d5d 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -5,10 +5,8 @@ #pragma once #include <array> - #include "common/bit_field.h" #include "common/common_types.h" - #include "video_core/pica.h" #include "video_core/primitive_assembly.h" #include "video_core/shader/shader.h" @@ -33,7 +31,7 @@ struct State { u32 raw; // LUT value, encoded as 12-bit fixed point, with 12 fraction bits - BitField< 0, 12, u32> value; // 0.0.12 fixed point + BitField<0, 12, u32> value; // 0.0.12 fixed point // Used by HW for efficient interpolation, Citra does not use these BitField<12, 12, s32> difference; // 1.0.11 fixed point @@ -51,8 +49,8 @@ struct State { // Used for raw access u32 raw; - BitField< 0, 13, s32> difference; // 1.1.11 fixed point - BitField<13, 11, u32> value; // 0.0.11 fixed point + BitField<0, 13, s32> difference; // 1.1.11 fixed point + BitField<13, 11, u32> value; // 0.0.11 fixed point }; std::array<LutEntry, 128> lut; diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 3b7bfbdca..5d7e10066 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -6,7 +6,6 @@ #include <cmath> #include <cstring> - #include "common/common_types.h" namespace Pica { @@ -22,7 +21,7 @@ namespace Pica { * * @todo Verify on HW if this conversion is sufficiently accurate. */ -template<unsigned M, unsigned E> +template <unsigned M, unsigned E> struct Float { public: static Float<M, E> FromFloat32(float val) { @@ -58,7 +57,7 @@ public: return value; } - Float<M, E> operator * (const Float<M, E>& flt) const { + Float<M, E> operator*(const Float<M, E>& flt) const { if ((this->value == 0.f && !std::isnan(flt.value)) || (flt.value == 0.f && !std::isnan(this->value))) // PICA gives 0 instead of NaN when multiplying by inf @@ -66,67 +65,68 @@ public: return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); } - Float<M, E> operator / (const Float<M, E>& flt) const { + Float<M, E> operator/(const Float<M, E>& flt) const { return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); } - Float<M, E> operator + (const Float<M, E>& flt) const { + Float<M, E> operator+(const Float<M, E>& flt) const { return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); } - Float<M, E> operator - (const Float<M, E>& flt) const { + Float<M, E> operator-(const Float<M, E>& flt) const { return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); } - Float<M, E>& operator *= (const Float<M, E>& flt) { + Float<M, E>& operator*=(const Float<M, E>& flt) { if ((this->value == 0.f && !std::isnan(flt.value)) || (flt.value == 0.f && !std::isnan(this->value))) // PICA gives 0 instead of NaN when multiplying by inf *this = Zero(); - else value *= flt.ToFloat32(); + else + value *= flt.ToFloat32(); return *this; } - Float<M, E>& operator /= (const Float<M, E>& flt) { + Float<M, E>& operator/=(const Float<M, E>& flt) { value /= flt.ToFloat32(); return *this; } - Float<M, E>& operator += (const Float<M, E>& flt) { + Float<M, E>& operator+=(const Float<M, E>& flt) { value += flt.ToFloat32(); return *this; } - Float<M, E>& operator -= (const Float<M, E>& flt) { + Float<M, E>& operator-=(const Float<M, E>& flt) { value -= flt.ToFloat32(); return *this; } - Float<M, E> operator - () const { + Float<M, E> operator-() const { return Float<M, E>::FromFloat32(-ToFloat32()); } - bool operator < (const Float<M, E>& flt) const { + bool operator<(const Float<M, E>& flt) const { return ToFloat32() < flt.ToFloat32(); } - bool operator > (const Float<M, E>& flt) const { + bool operator>(const Float<M, E>& flt) const { return ToFloat32() > flt.ToFloat32(); } - bool operator >= (const Float<M, E>& flt) const { + bool operator>=(const Float<M, E>& flt) const { return ToFloat32() >= flt.ToFloat32(); } - bool operator <= (const Float<M, E>& flt) const { + bool operator<=(const Float<M, E>& flt) const { return ToFloat32() <= flt.ToFloat32(); } - bool operator == (const Float<M, E>& flt) const { + bool operator==(const Float<M, E>& flt) const { return ToFloat32() == flt.ToFloat32(); } - bool operator != (const Float<M, E>& flt) const { + bool operator!=(const Float<M, E>& flt) const { return ToFloat32() != flt.ToFloat32(); } diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 68ea3c08a..be7377290 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -3,69 +3,66 @@ // Refer to the license.txt file included. #include "common/logging/log.h" - #include "video_core/pica.h" #include "video_core/primitive_assembly.h" #include "video_core/shader/shader.h" namespace Pica { -template<typename VertexType> +template <typename VertexType> PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) - : topology(topology), buffer_index(0) { -} + : topology(topology), buffer_index(0) {} -template<typename VertexType> -void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) -{ +template <typename VertexType> +void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, + TriangleHandler triangle_handler) { switch (topology) { - // TODO: Figure out what's different with TriangleTopology::Shader. - case Regs::TriangleTopology::List: - case Regs::TriangleTopology::Shader: - if (buffer_index < 2) { - buffer[buffer_index++] = vtx; - } else { - buffer_index = 0; + // TODO: Figure out what's different with TriangleTopology::Shader. + case Regs::TriangleTopology::List: + case Regs::TriangleTopology::Shader: + if (buffer_index < 2) { + buffer[buffer_index++] = vtx; + } else { + buffer_index = 0; - triangle_handler(buffer[0], buffer[1], vtx); - } - break; + triangle_handler(buffer[0], buffer[1], vtx); + } + break; - case Regs::TriangleTopology::Strip: - case Regs::TriangleTopology::Fan: - if (strip_ready) - triangle_handler(buffer[0], buffer[1], vtx); + case Regs::TriangleTopology::Strip: + case Regs::TriangleTopology::Fan: + if (strip_ready) + triangle_handler(buffer[0], buffer[1], vtx); - buffer[buffer_index] = vtx; + buffer[buffer_index] = vtx; - strip_ready |= (buffer_index == 1); + strip_ready |= (buffer_index == 1); - if (topology == Regs::TriangleTopology::Strip) - buffer_index = !buffer_index; - else if (topology == Regs::TriangleTopology::Fan) - buffer_index = 1; - break; + if (topology == Regs::TriangleTopology::Strip) + buffer_index = !buffer_index; + else if (topology == Regs::TriangleTopology::Fan) + buffer_index = 1; + break; - default: - LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology); - break; + default: + LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology); + break; } } -template<typename VertexType> +template <typename VertexType> void PrimitiveAssembler<VertexType>::Reset() { buffer_index = 0; strip_ready = false; } -template<typename VertexType> +template <typename VertexType> void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) { Reset(); this->topology = topology; } // explicitly instantiate use cases -template -struct PrimitiveAssembler<Shader::OutputVertex>; +template struct PrimitiveAssembler<Shader::OutputVertex>; } // namespace diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 9396b4c85..0384d5984 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -5,7 +5,6 @@ #pragma once #include <functional> - #include "video_core/pica.h" namespace Pica { @@ -14,11 +13,9 @@ namespace Pica { * Utility class to build triangles from a series of vertices, * according to a given triangle topology. */ -template<typename VertexType> +template <typename VertexType> struct PrimitiveAssembler { - using TriangleHandler = std::function<void(VertexType& v0, - VertexType& v1, - VertexType& v2)>; + using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>; PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); @@ -48,5 +45,4 @@ private: bool strip_ready = false; }; - } // namespace diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 6f369a00e..6c4bbed33 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <array> #include <cmath> - #include "common/assert.h" #include "common/bit_field.h" #include "common/color.h" @@ -14,17 +13,15 @@ #include "common/math_util.h" #include "common/microprofile.h" #include "common/vector_math.h" - -#include "core/memory.h" #include "core/hw/gpu.h" - +#include "core/memory.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" #include "video_core/pica_state.h" #include "video_core/pica_types.h" #include "video_core/rasterizer.h" -#include "video_core/utils.h" #include "video_core/shader/shader.h" +#include "video_core/utils.h" namespace Pica { @@ -39,8 +36,10 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { y = framebuffer.height - y; const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; + u32 bytes_per_pixel = + GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + + coarse_y * framebuffer.width * bytes_per_pixel; u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; switch (framebuffer.color_format) { @@ -65,7 +64,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { break; default: - LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", + framebuffer.color_format.Value()); UNIMPLEMENTED(); } } @@ -77,8 +77,10 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { y = framebuffer.height - y; const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); - u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; + u32 bytes_per_pixel = + GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + + coarse_y * framebuffer.width * bytes_per_pixel; u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; switch (framebuffer.color_format) { @@ -98,7 +100,8 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { return Color::DecodeRGBA4(src_pixel); default: - LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", + framebuffer.color_format.Value()); UNIMPLEMENTED(); } @@ -120,16 +123,16 @@ static u32 GetDepth(int x, int y) { u8* src_pixel = depth_buffer + src_offset; switch (framebuffer.depth_format) { - case Regs::DepthFormat::D16: - return Color::DecodeD16(src_pixel); - case Regs::DepthFormat::D24: - return Color::DecodeD24(src_pixel); - case Regs::DepthFormat::D24S8: - return Color::DecodeD24S8(src_pixel).x; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); - UNIMPLEMENTED(); - return 0; + case Regs::DepthFormat::D16: + return Color::DecodeD16(src_pixel); + case Regs::DepthFormat::D24: + return Color::DecodeD24(src_pixel); + case Regs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).x; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + return 0; } } @@ -148,12 +151,15 @@ static u8 GetStencil(int x, int y) { u8* src_pixel = depth_buffer + src_offset; switch (framebuffer.depth_format) { - case Regs::DepthFormat::D24S8: - return Color::DecodeD24S8(src_pixel).y; + case Regs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).y; - default: - LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); - return 0; + default: + LOG_WARNING( + HW_GPU, + "GetStencil called for function which doesn't have a stencil component (format %u)", + framebuffer.depth_format); + return 0; } } @@ -172,22 +178,22 @@ static void SetDepth(int x, int y, u32 value) { u8* dst_pixel = depth_buffer + dst_offset; switch (framebuffer.depth_format) { - case Regs::DepthFormat::D16: - Color::EncodeD16(value, dst_pixel); - break; - - case Regs::DepthFormat::D24: - Color::EncodeD24(value, dst_pixel); - break; - - case Regs::DepthFormat::D24S8: - Color::EncodeD24X8(value, dst_pixel); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); - UNIMPLEMENTED(); - break; + case Regs::DepthFormat::D16: + Color::EncodeD16(value, dst_pixel); + break; + + case Regs::DepthFormat::D24: + Color::EncodeD24(value, dst_pixel); + break; + + case Regs::DepthFormat::D24S8: + Color::EncodeD24X8(value, dst_pixel); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; } } @@ -206,19 +212,19 @@ static void SetStencil(int x, int y, u8 value) { u8* dst_pixel = depth_buffer + dst_offset; switch (framebuffer.depth_format) { - case Pica::Regs::DepthFormat::D16: - case Pica::Regs::DepthFormat::D24: - // Nothing to do - break; - - case Pica::Regs::DepthFormat::D24S8: - Color::EncodeX24S8(value, dst_pixel); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); - UNIMPLEMENTED(); - break; + case Pica::Regs::DepthFormat::D16: + case Pica::Regs::DepthFormat::D24: + // Nothing to do + break; + + case Pica::Regs::DepthFormat::D24S8: + Color::EncodeX24S8(value, dst_pixel); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; } } @@ -262,15 +268,19 @@ struct Fix12P4 { Fix12P4() {} Fix12P4(u16 val) : val(val) {} - static u16 FracMask() { return 0xF; } - static u16 IntMask() { return (u16)~0xF; } + static u16 FracMask() { + return 0xF; + } + static u16 IntMask() { + return (u16)~0xF; + } operator u16() const { return val; } - bool operator < (const Fix12P4& oth) const { - return (u16)*this < (u16)oth; + bool operator<(const Fix12P4& oth) const { + return (u16) * this < (u16)oth; } private: @@ -283,9 +293,8 @@ private: * * @todo define orientation concretely. */ -static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, - const Math::Vec2<Fix12P4>& vtx2, - const Math::Vec2<Fix12P4>& vtx3) { +static int SignedArea(const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2, + const Math::Vec2<Fix12P4>& vtx3) { const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); // TODO: There is a very small chance this will overflow for sizeof(int) == 4 @@ -298,11 +307,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing * culling via recursion. */ -static void ProcessTriangleInternal(const Shader::OutputVertex& v0, - const Shader::OutputVertex& v1, - const Shader::OutputVertex& v2, - bool reversed = false) -{ +static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, + const Shader::OutputVertex& v2, bool reversed = false) { const auto& regs = g_state.regs; MICROPROFILE_SCOPE(GPU_Rasterization); @@ -316,9 +322,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; }; - Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), - ScreenToRasterizerCoordinates(v1.screenpos), - ScreenToRasterizerCoordinates(v2.screenpos) }; + Math::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos), + ScreenToRasterizerCoordinates(v1.screenpos), + ScreenToRasterizerCoordinates(v2.screenpos)}; if (regs.cull_mode == Regs::CullMode::KeepAll) { // Make sure we always end up with a triangle wound counter-clockwise @@ -344,8 +350,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); // Convert the scissor box coordinates to 12.4 fixed point - u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); - u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); + u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4); + u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4); // x2,y2 have +1 added to cover the entire sub-pixel area u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); @@ -369,27 +375,32 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, const Math::Vec2<Fix12P4>& line1, - const Math::Vec2<Fix12P4>& line2) - { + const Math::Vec2<Fix12P4>& line2) { if (line1.y == line2.y) { // just check if vertex is above us => bottom line parallel to x-axis return vtx.y < line1.y; } else { // check if vertex is on our left => right side // TODO: Not sure how likely this is to overflow - return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); + return (int)vtx.x < (int)line1.x + + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / + ((int)line2.y - (int)line1.y); } }; - int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; - int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; - int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; + int bias0 = + IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; + int bias1 = + IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; + int bias2 = + IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); auto textures = regs.GetTextures(); auto tev_stages = regs.GetTevStages(); - bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; + bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && + g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; const auto stencil_test = g_state.regs.output_merger.stencil_test; // Enter rasterization loop, starting at the center of the topleft bounding box corner. @@ -397,10 +408,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, for (u16 y = min_y + 8; y < max_y; y += 0x10) { for (u16 x = min_x + 8; x < max_x; x += 0x10) { - // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude + // Do not process the pixel if it's inside the scissor box and the scissor mode is set + // to Exclude if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { - if (x >= scissor_x1 && x < scissor_x2 && - y >= scissor_y1 && y < scissor_y2) + if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) continue; } @@ -414,15 +425,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, if (w0 < 0 || w1 < 0 || w2 < 0) continue; - auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), - float24::FromFloat32(static_cast<float>(w1)), - float24::FromFloat32(static_cast<float>(w2))); - float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); + auto baricentric_coordinates = + Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), + float24::FromFloat32(static_cast<float>(w1)), + float24::FromFloat32(static_cast<float>(w2))); + float24 interpolated_w_inverse = + float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); // interpolated_z = z / w - float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) / wsum; + float interpolated_z_over_w = + (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) / + wsum; // Not fully accurate. About 3 bits in precision are missing. // Z-Buffer (z / w * scale + offset) @@ -461,10 +475,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, }; Math::Vec4<u8> primary_color{ - (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), - (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), - (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), - (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) + (u8)( + GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * + 255), + (u8)( + GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * + 255), + (u8)( + GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * + 255), + (u8)( + GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * + 255), }; Math::Vec2<float24> uv[3]; @@ -489,7 +511,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, // Only unit 0 respects the texturing type (according to 3DBrew) // TODO: Refactor so cubemaps and shadowmaps can be handled if (i == 0) { - switch(texture.config.type) { + switch (texture.config.type) { case Regs::TextureConfig::Texture2D: break; case Regs::TextureConfig::Projection2D: { @@ -506,51 +528,58 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } } - int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); - int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); + int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))) + .ToFloat32(); + int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) + .ToFloat32(); - - static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { + static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, + unsigned size) { switch (mode) { - case Regs::TextureConfig::ClampToEdge: - val = std::max(val, 0); - val = std::min(val, (int)size - 1); - return val; - - case Regs::TextureConfig::ClampToBorder: - return val; - - case Regs::TextureConfig::Repeat: - return (int)((unsigned)val % size); - - case Regs::TextureConfig::MirroredRepeat: - { - unsigned int coord = ((unsigned)val % (2 * size)); - if (coord >= size) - coord = 2 * size - 1 - coord; - return (int)coord; - } - - default: - LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); - UNIMPLEMENTED(); - return 0; + case Regs::TextureConfig::ClampToEdge: + val = std::max(val, 0); + val = std::min(val, (int)size - 1); + return val; + + case Regs::TextureConfig::ClampToBorder: + return val; + + case Regs::TextureConfig::Repeat: + return (int)((unsigned)val % size); + + case Regs::TextureConfig::MirroredRepeat: { + unsigned int coord = ((unsigned)val % (2 * size)); + if (coord >= size) + coord = 2 * size - 1 - coord; + return (int)coord; + } + + default: + LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); + UNIMPLEMENTED(); + return 0; } }; - if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) - || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { + if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && + (s < 0 || s >= texture.config.width)) || + (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && + (t < 0 || t >= texture.config.height))) { auto border_color = texture.config.border_color; - texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; + texture_color[i] = {border_color.r, border_color.g, border_color.b, + border_color.a}; } else { // Textures are laid out from bottom to top, hence we invert the t coordinate. // NOTE: This may not be the right place for the inversion. // TODO: Check if this applies to ETC textures, too. s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); - t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); + t = texture.config.height - 1 - + GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); - u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); - auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); + u8* texture_data = + Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + auto info = + DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); // TODO: Apply the min and mag filters to the texture texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); @@ -571,10 +600,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; Math::Vec4<u8> next_combiner_buffer = { regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, - regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a + regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a, }; - for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); + ++tev_stage_index) { const auto& tev_stage = tev_stages[tev_stage_index]; using Source = Regs::TevStageConfig::Source; using ColorModifier = Regs::TevStageConfig::ColorModifier; @@ -606,7 +636,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return combiner_buffer; case Source::Constant: - return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; + return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, + tev_stage.const_a}; case Source::Previous: return combiner_output; @@ -618,7 +649,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } }; - static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { + static auto GetColorModifier = [](ColorModifier factor, + const Math::Vec4<u8>& values) -> Math::Vec3<u8> { switch (factor) { case ColorModifier::SourceColor: return values.rgb(); @@ -652,7 +684,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } }; - static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 { + static auto GetAlphaModifier = [](AlphaModifier factor, + const Math::Vec4<u8>& values) -> u8 { switch (factor) { case AlphaModifier::SourceAlpha: return values.a(); @@ -680,7 +713,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } }; - static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { + static auto ColorCombine = [](Operation op, + const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { switch (op) { case Operation::Replace: return input[0]; @@ -688,8 +722,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, case Operation::Modulate: return ((input[0] * input[1]) / 255).Cast<u8>(); - case Operation::Add: - { + case Operation::Add: { auto result = input[0] + input[1]; result.r() = std::min(255, result.r()); result.g() = std::min(255, result.g()); @@ -697,10 +730,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return result.Cast<u8>(); } - case Operation::AddSigned: - { - // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct - auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); + case Operation::AddSigned: { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to + // (byte) 128 is correct + auto result = input[0].Cast<int>() + input[1].Cast<int>() - + Math::MakeVec<int>(128, 128, 128); result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); @@ -708,10 +742,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } case Operation::Lerp: - return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); + return ((input[0] * input[2] + + input[1] * + (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / + 255) + .Cast<u8>(); - case Operation::Subtract: - { + case Operation::Subtract: { auto result = input[0].Cast<int>() - input[1].Cast<int>(); result.r() = std::max(0, result.r()); result.g() = std::max(0, result.g()); @@ -719,8 +756,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return result.Cast<u8>(); } - case Operation::MultiplyThenAdd: - { + case Operation::MultiplyThenAdd: { auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; result.r() = std::min(255, result.r()); result.g() = std::min(255, result.g()); @@ -728,8 +764,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return result.Cast<u8>(); } - case Operation::AddThenMultiply: - { + case Operation::AddThenMultiply: { auto result = input[0] + input[1]; result.r() = std::min(255, result.r()); result.g() = std::min(255, result.g()); @@ -737,17 +772,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, result = (result * input[2].Cast<int>()) / 255; return result.Cast<u8>(); } - case Operation::Dot3_RGB: - { + case Operation::Dot3_RGB: { // Not fully accurate. // Worst case scenario seems to yield a +/-3 error - // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, - // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results - int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + - ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + - ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; + // Some HW results indicate that the per-component computation can't have a + // higher precision than 1/256, + // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( + // (0x80,g0,b0),(0x80,g1,b1) ) give different results + int result = + ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + + ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + + ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; result = std::max(0, std::min(255, result)); - return { (u8)result, (u8)result, (u8)result }; + return {(u8)result, (u8)result, (u8)result}; } default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); @@ -756,7 +793,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } }; - static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { + static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 { switch (op) { case Operation::Replace: return input[0]; @@ -767,9 +804,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, case Operation::Add: return std::min(255, input[0] + input[1]); - case Operation::AddSigned: - { - // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct + case Operation::AddSigned: { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to + // (byte) 128 is correct auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); } @@ -801,32 +838,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, Math::Vec3<u8> color_result[3] = { GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), - GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)) + GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)), }; auto color_output = ColorCombine(tev_stage.color_op, color_result); // alpha combiner - std::array<u8,3> alpha_result = {{ + std::array<u8, 3> alpha_result = {{ GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), - GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)) + GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)), }}; auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); - combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); - combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); - combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); - combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); + combiner_output[0] = + std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); + combiner_output[1] = + std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); + combiner_output[2] = + std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); + combiner_output[3] = + std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); combiner_buffer = next_combiner_buffer; - if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { + if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( + tev_stage_index)) { next_combiner_buffer.r() = combiner_output.r(); next_combiner_buffer.g() = combiner_output.g(); next_combiner_buffer.b() = combiner_output.b(); } - if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { + if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( + tev_stage_index)) { next_combiner_buffer.a() = combiner_output.a(); } } @@ -897,21 +940,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); float fog_f = fog_index - fog_i; const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; - float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 + float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / + 2047.0f; // This is signed fixed point 1.11 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); // Blend the fog for (unsigned i = 0; i < 3; i++) { - combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; + combiner_output[i] = + fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; } } u8 old_stencil = 0; - auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { - u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); + auto UpdateStencil = [stencil_test, x, y, + &old_stencil](Pica::Regs::StencilAction action) { + u8 new_stencil = + PerformStencilAction(action, old_stencil, stencil_test.reference_value); if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) - SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); + SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | + (old_stencil & ~stencil_test.write_mask)); }; if (stencil_action_enable) { @@ -1030,7 +1078,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, static_cast<u8>(output_merger.blend_const.r), static_cast<u8>(output_merger.blend_const.g), static_cast<u8>(output_merger.blend_const.b), - static_cast<u8>(output_merger.blend_const.a) + static_cast<u8>(output_merger.blend_const.a), }; switch (factor) { @@ -1091,12 +1139,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return combiner_output[channel]; }; - static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, - const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, - Regs::BlendEquation equation) { + static auto EvaluateBlendEquation = []( + const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, + const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, + Regs::BlendEquation equation) { Math::Vec4<int> result; - auto src_result = (src * srcfactor).Cast<int>(); + auto src_result = (src * srcfactor).Cast<int>(); auto dst_result = (dest * destfactor).Cast<int>(); switch (equation) { @@ -1134,10 +1183,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, UNIMPLEMENTED(); } - return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), - MathUtil::Clamp(result.g(), 0, 255), - MathUtil::Clamp(result.b(), 0, 255), - MathUtil::Clamp(result.a(), 0, 255)); + return Math::Vec4<u8>( + MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), + MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); }; auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), @@ -1150,8 +1198,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, LookupFactor(2, params.factor_dest_rgb), LookupFactor(3, params.factor_dest_a)); - blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); - blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); + blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, + params.blend_equation_rgb); + blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, + dstfactor, params.blend_equation_a) + .a(); } else { static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { switch (op) { @@ -1205,18 +1256,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } }; - blend_output = Math::MakeVec( - LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), - LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), - LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), - LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); + blend_output = + Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), + LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), + LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), + LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); } const Math::Vec4<u8> result = { - output_merger.red_enable ? blend_output.r() : dest.r(), + output_merger.red_enable ? blend_output.r() : dest.r(), output_merger.green_enable ? blend_output.g() : dest.g(), - output_merger.blue_enable ? blend_output.b() : dest.b(), - output_merger.alpha_enable ? blend_output.a() : dest.a() + output_merger.blue_enable ? blend_output.b() : dest.b(), + output_merger.alpha_enable ? blend_output.a() : dest.a(), }; if (regs.framebuffer.allow_color_write != 0) @@ -1225,8 +1276,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } } -void ProcessTriangle(const Shader::OutputVertex& v0, - const Shader::OutputVertex& v1, +void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, const Shader::OutputVertex& v2) { ProcessTriangleInternal(v0, v1, v2); } diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h index a6a9634b4..6cbda3067 100644 --- a/src/video_core/rasterizer.h +++ b/src/video_core/rasterizer.h @@ -7,13 +7,12 @@ namespace Pica { namespace Shader { - struct OutputVertex; +struct OutputVertex; } namespace Rasterizer { -void ProcessTriangle(const Shader::OutputVertex& v0, - const Shader::OutputVertex& v1, +void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, const Shader::OutputVertex& v2); } // namespace Rasterizer diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index bf7101665..71df233b5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -5,7 +5,6 @@ #pragma once #include "common/common_types.h" - #include "core/hw/gpu.h" struct ScreenInfo; @@ -39,17 +38,25 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory virtual void FlushRegion(PAddr addr, u32 size) = 0; - /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory + /// and invalidated virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; /// Attempt to use a faster method to perform a display transfer - virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; } + virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + return false; + } /// Attempt to use a faster method to fill a region - virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; } + virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + return false; + } /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; } + virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + return false; + } }; - } diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 3f451e062..fd38175b3 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -4,11 +4,10 @@ #include <atomic> #include <memory> - #include "video_core/renderer_base.h" -#include "video_core/video_core.h" -#include "video_core/swrasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/swrasterizer.h" +#include "video_core/video_core.h" void RendererBase::RefreshRasterizerSetting() { bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index f68091cc8..589aca857 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -5,25 +5,17 @@ #pragma once #include <memory> - #include "common/common_types.h" - #include "video_core/rasterizer_interface.h" class EmuWindow; class RendererBase : NonCopyable { public: - /// Used to reference a framebuffer - enum kFramebuffer { - kFramebuffer_VirtualXFB = 0, - kFramebuffer_EFB, - kFramebuffer_Texture - }; + enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture }; - virtual ~RendererBase() { - } + virtual ~RendererBase() {} /// Swap buffers (render frame) virtual void SwapBuffers() = 0; @@ -59,8 +51,8 @@ public: protected: std::unique_ptr<VideoCore::RasterizerInterface> rasterizer; - f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer - int m_current_frame = 0; ///< Current frame, should be set by the renderer + f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer + int m_current_frame = 0; ///< Current frame, should be set by the renderer private: bool opengl_rasterizer_active = false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f8393c618..60c9d9180 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -6,17 +6,13 @@ #include <string> #include <tuple> #include <utility> - #include <glad/glad.h> - #include "common/assert.h" #include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/vector_math.h" - #include "core/hw/gpu.h" - #include "video_core/pica.h" #include "video_core/pica_state.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -32,8 +28,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && - stage.GetAlphaMultiplier() == 1); + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); } RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { @@ -65,26 +60,34 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { uniform_block_data.fog_lut_dirty = true; // Set vertex attributes - glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); + glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, + sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); - glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); + glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, color)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_COLOR); - glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); - glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); - glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); + glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, + sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); + glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, + sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); + glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, + sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); - glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); + glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, + sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); - glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, + sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); - glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); + glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, view)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); // Create render framebuffer @@ -129,9 +132,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { SyncDepthWriteMask(); } -RasterizerOpenGL::~RasterizerOpenGL() { - -} +RasterizerOpenGL::~RasterizerOpenGL() {} /** * This is a helper function to resolve an issue with opposite quaternions being interpolated by @@ -149,8 +150,8 @@ RasterizerOpenGL::~RasterizerOpenGL() { * manually using two Lerps, and doing this correction before each Lerp. */ static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { - Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; - Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; + Math::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; + Math::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; return (Math::Dot(a, b) < 0.f); } @@ -173,15 +174,20 @@ void RasterizerOpenGL::DrawTriangles() { CachedSurface* color_surface; CachedSurface* depth_surface; MathUtil::Rectangle<int> rect; - std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer); + std::tie(color_surface, depth_surface, rect) = + res_cache.GetFramebufferSurfaces(regs.framebuffer); state.draw.draw_framebuffer = framebuffer.handle; state.Apply(); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); + glFramebufferTexture2D( + GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { return; @@ -194,7 +200,8 @@ void RasterizerOpenGL::DrawTriangles() { glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width), (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), - (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height)); + (GLsizei)(viewport_width * color_surface->res_scale_width), + (GLsizei)(viewport_height * color_surface->res_scale_height)); if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { @@ -245,14 +252,16 @@ void RasterizerOpenGL::DrawTriangles() { // Sync the uniform data if (uniform_block_data.dirty) { - glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); + glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, + GL_STATIC_DRAW); uniform_block_data.dirty = false; } state.Apply(); // Draw the vertex batch - glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); + glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), + GL_STREAM_DRAW); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); // Mark framebuffer surfaces as dirty @@ -278,7 +287,7 @@ void RasterizerOpenGL::DrawTriangles() { void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { const auto& regs = Pica::g_state.regs; - switch(id) { + switch (id) { // Culling case PICA_REG_INDEX(cull_mode): SyncCullMode(); @@ -548,7 +557,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncLightAmbient(7); break; - // Fragment lighting position + // Fragment lighting position case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): SyncLightPosition(0); @@ -659,13 +668,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): - case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): - { + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { auto& lut_config = regs.lighting.lut_config; uniform_block_data.lut_dirty[lut_config.type / 4] = true; break; } - } } @@ -699,8 +706,10 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe CachedSurface dst_params; dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); - dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); + dst_params.width = + config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); + dst_params.height = + config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); dst_params.is_tiled = config.input_linear != config.dont_swizzle; dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); @@ -735,7 +744,8 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe return false; } - u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; + u32 dst_size = dst_params.width * dst_params.height * + CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; dst_surface->dirty = true; res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); return true; @@ -757,12 +767,15 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) GLuint old_fb = cur_state.draw.draw_framebuffer; cur_state.draw.draw_framebuffer = framebuffer.handle; - // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected + // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so + // Clear call isn't affected cur_state.Apply(); if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + dst_surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { return false; @@ -770,8 +783,10 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases - // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/... + // TODO: Handle additional pixel format and fill value size combinations to accelerate more + // cases + // For instance, checking if fill value's bytes/bits repeat to allow filling + // I8/A8/I4/A4/... // Currently only handles formats that are multiples of the fill value size if (config.fill_24bit) { @@ -846,7 +861,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) glClearBufferfv(GL_COLOR, 0, color_values); } else if (dst_type == SurfaceType::Depth) { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + dst_surface->texture.handle, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { @@ -865,7 +881,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) glClearBufferfv(GL_DEPTH, 0, &value_float); } else if (dst_type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_surface->texture.handle, 0); if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { return false; @@ -889,7 +906,9 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) return true; } -bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { +bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { if (framebuffer_addr == 0) { return false; } @@ -912,10 +931,9 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con u32 scaled_width = src_surface->GetScaledWidth(); u32 scaled_height = src_surface->GetScaledHeight(); - screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height, - (float)src_rect.left / (float)scaled_width, - (float)src_rect.bottom / (float)scaled_height, - (float)src_rect.right / (float)scaled_width); + screen_info.display_texcoords = MathUtil::Rectangle<float>( + (float)src_rect.top / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.right / (float)scaled_width); screen_info.display_texture = src_surface->texture.handle; @@ -928,7 +946,8 @@ void RasterizerOpenGL::SamplerInfo::Create() { wrap_s = wrap_t = TextureConfig::Repeat; border_color = 0; - glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, + GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR // Other attributes have correct defaults } @@ -976,41 +995,64 @@ void RasterizerOpenGL::SetShader() { } else { LOG_DEBUG(Render_OpenGL, "Creating new shader"); - shader->shader.Create(GLShader::GenerateVertexShader().c_str(), GLShader::GenerateFragmentShader(config).c_str()); + shader->shader.Create(GLShader::GenerateVertexShader().c_str(), + GLShader::GenerateFragmentShader(config).c_str()); state.draw.shader_program = shader->shader.handle; state.Apply(); // Set the texture samplers to correspond to different texture units GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); - if (uniform_tex != -1) { glUniform1i(uniform_tex, 0); } + if (uniform_tex != -1) { + glUniform1i(uniform_tex, 0); + } uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); - if (uniform_tex != -1) { glUniform1i(uniform_tex, 1); } + if (uniform_tex != -1) { + glUniform1i(uniform_tex, 1); + } uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); - if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } + if (uniform_tex != -1) { + glUniform1i(uniform_tex, 2); + } // Set the texture samplers to correspond to different lookup table texture units GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); - if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } + if (uniform_lut != -1) { + glUniform1i(uniform_lut, 3); + } uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); - if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } + if (uniform_lut != -1) { + glUniform1i(uniform_lut, 4); + } uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); - if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } + if (uniform_lut != -1) { + glUniform1i(uniform_lut, 5); + } uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); - if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } + if (uniform_lut != -1) { + glUniform1i(uniform_lut, 6); + } uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); - if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } + if (uniform_lut != -1) { + glUniform1i(uniform_lut, 7); + } uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); - if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } + if (uniform_lut != -1) { + glUniform1i(uniform_lut, 8); + } GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); - if (uniform_fog_lut != -1) { glUniform1i(uniform_fog_lut, 9); } + if (uniform_fog_lut != -1) { + glUniform1i(uniform_fog_lut, 9); + } current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); - unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); + unsigned int block_index = + glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); GLint block_size; - glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); + glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, + GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!"); glUniformBlockBinding(current_shader->shader.handle, block_index, 0); @@ -1073,7 +1115,8 @@ void RasterizerOpenGL::SyncDepthScale() { } void RasterizerOpenGL::SyncDepthOffset() { - float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); + float depth_offset = + Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); if (depth_offset != uniform_block_data.data.depth_offset) { uniform_block_data.data.depth_offset = depth_offset; uniform_block_data.dirty = true; @@ -1086,10 +1129,14 @@ void RasterizerOpenGL::SyncBlendEnabled() { void RasterizerOpenGL::SyncBlendFuncs() { const auto& regs = Pica::g_state.regs; - state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); - state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); - state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); - state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); + state.blend.rgb_equation = + PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); + state.blend.a_equation = + PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); + state.blend.src_rgb_func = + PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); + state.blend.dst_rgb_func = + PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); } @@ -1105,9 +1152,8 @@ void RasterizerOpenGL::SyncBlendColor() { void RasterizerOpenGL::SyncFogColor() { const auto& regs = Pica::g_state.regs; uniform_block_data.data.fog_color = { - regs.fog_color.r.Value() / 255.0f, - regs.fog_color.g.Value() / 255.0f, - regs.fog_color.b.Value() / 255.0f + regs.fog_color.r.Value() / 255.0f, regs.fog_color.g.Value() / 255.0f, + regs.fog_color.b.Value() / 255.0f, }; uniform_block_data.dirty = true; } @@ -1115,14 +1161,14 @@ void RasterizerOpenGL::SyncFogColor() { void RasterizerOpenGL::SyncFogLUT() { std::array<GLuint, 128> new_data; - std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), [](const auto& entry) { - return entry.raw; - }); + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { return entry.raw; }); if (new_data != fog_lut_data) { fog_lut_data = new_data; glActiveTexture(GL_TEXTURE9); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, fog_lut_data.data()); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, + fog_lut_data.data()); } } @@ -1154,34 +1200,40 @@ void RasterizerOpenGL::SyncColorWriteMask() { void RasterizerOpenGL::SyncStencilWriteMask() { const auto& regs = Pica::g_state.regs; state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0) - ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) - : 0; + ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) + : 0; } void RasterizerOpenGL::SyncDepthWriteMask() { const auto& regs = Pica::g_state.regs; - state.depth.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) - ? GL_TRUE - : GL_FALSE; + state.depth.write_mask = + (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) + ? GL_TRUE + : GL_FALSE; } void RasterizerOpenGL::SyncStencilTest() { const auto& regs = Pica::g_state.regs; - state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; + state.stencil.test_enabled = regs.output_merger.stencil_test.enable && + regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; - state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); - state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); - state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); + state.stencil.action_stencil_fail = + PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); + state.stencil.action_depth_fail = + PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); + state.stencil.action_depth_pass = + PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); } void RasterizerOpenGL::SyncDepthTest() { const auto& regs = Pica::g_state.regs; - state.depth.test_enabled = regs.output_merger.depth_test_enable == 1 || - regs.output_merger.depth_write_enable == 1; - state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? - PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; + state.depth.test_enabled = + regs.output_merger.depth_test_enable == 1 || regs.output_merger.depth_write_enable == 1; + state.depth.test_func = regs.output_merger.depth_test_enable == 1 + ? PicaToGL::CompareFunc(regs.output_merger.depth_test_func) + : GL_ALWAYS; } void RasterizerOpenGL::SyncScissorTest() { @@ -1208,7 +1260,8 @@ void RasterizerOpenGL::SyncCombinerColor() { } } -void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevStageConfig& tev_stage) { +void RasterizerOpenGL::SyncTevConstColor(int stage_index, + const Pica::Regs::TevStageConfig& tev_stage) { auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); if (const_color != uniform_block_data.data.const_color[stage_index]) { uniform_block_data.data.const_color[stage_index] = const_color; @@ -1237,7 +1290,8 @@ void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { if (new_data != lighting_lut_data[lut_index]) { lighting_lut_data[lut_index] = new_data; glActiveTexture(GL_TEXTURE3 + lut_index); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, + lighting_lut_data[lut_index].data()); } } @@ -1277,7 +1331,7 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { GLvec3 position = { Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), - Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()}; if (position != uniform_block_data.data.light_src[light_index].position) { uniform_block_data.data.light_src[light_index].position = position; @@ -1286,7 +1340,9 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { } void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { - GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); + GLfloat dist_atten_bias = + Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) + .ToFloat32(); if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; @@ -1295,7 +1351,9 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { } void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { - GLfloat dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); + GLfloat dist_atten_scale = + Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) + .ToFloat32(); if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c5029432b..24fefed1b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -8,18 +8,14 @@ #include <cstddef> #include <cstring> #include <memory> -#include <vector> #include <unordered_map> - +#include <vector> #include <glad/glad.h> - #include "common/bit_field.h" #include "common/common_types.h" #include "common/hash.h" #include "common/vector_math.h" - #include "core/hw/gpu.h" - #include "video_core/pica.h" #include "video_core/pica_state.h" #include "video_core/pica_types.h" @@ -40,10 +36,10 @@ struct ScreenInfo; * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) * two separate shaders sharing the same key. * - * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." - * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." - * = Bytewise copy instead of memberwise copy. - * This is important because the padding bytes are included in the hash and comparison between objects. + * We use a union because "implicitly-defined copy/move constructor for a union X copies the object + * representation of X." and "implicitly-defined copy assignment operator for a union X copies the + * object representation (3.9) of X." = Bytewise copy instead of memberwise copy. This is important + * because the padding bytes are included in the hash and comparison between objects. */ union PicaShaderConfig { @@ -60,8 +56,9 @@ union PicaShaderConfig { state.depthmap_enable = regs.depthmap_enable; - state.alpha_test_func = regs.output_merger.alpha_test.enable ? - regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; + state.alpha_test_func = regs.output_merger.alpha_test.enable + ? regs.output_merger.alpha_test.func.Value() + : Pica::Regs::CompareFunc::Always; state.texture0_type = regs.texture0.type; @@ -81,9 +78,8 @@ union PicaShaderConfig { state.fog_mode = regs.fog_mode; state.fog_flip = regs.fog_flip; - state.combiner_buffer_input = - regs.tev_combiner_buffer_input.update_mask_rgb.Value() | - regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; + state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | + regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; // Fragment lighting @@ -95,8 +91,10 @@ union PicaShaderConfig { const auto& light = regs.lighting.light[num]; state.lighting.light[light_index].num = num; state.lighting.light[light_index].directional = light.config.directional != 0; - state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; - state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + state.lighting.light[light_index].two_sided_diffuse = + light.config.two_sided_diffuse != 0; + state.lighting.light[light_index].dist_atten_enable = + !regs.lighting.IsDistAttenDisabled(num); } state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; @@ -147,7 +145,7 @@ union PicaShaderConfig { return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); } - bool operator ==(const PicaShaderConfig& o) const { + bool operator==(const PicaShaderConfig& o) const { return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; }; @@ -212,7 +210,8 @@ union PicaShaderConfig { } state; }; #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) -static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable"); +static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, + "PicaShaderConfig::State must be trivially copyable"); #endif namespace std { @@ -228,12 +227,10 @@ struct hash<PicaShaderConfig> { class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - RasterizerOpenGL(); ~RasterizerOpenGL() override; - void AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, + void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) override; void DrawTriangles() override; void NotifyPicaRegisterChanged(u32 id) override; @@ -242,7 +239,8 @@ public: void FlushAndInvalidateRegion(PAddr addr, u32 size) override; bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; - bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info) override; /// OpenGL shader generated for a given Pica register state struct PicaShader { @@ -251,13 +249,13 @@ public: }; private: - struct SamplerInfo { using TextureConfig = Pica::Regs::TextureConfig; OGLSampler sampler; - /// Creates the sampler object, initializing its state so that it's in sync with the SamplerInfo struct. + /// Creates the sampler object, initializing its state so that it's in sync with the + /// SamplerInfo struct. void Create(); /// Syncs the sampler object with the config, updating any necessary state. void SyncWithConfig(const TextureConfig& config); @@ -343,8 +341,11 @@ private: alignas(16) GLvec4 tev_combiner_buffer_color; }; - static_assert(sizeof(UniformData) == 0x3C0, "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); + static_assert( + sizeof(UniformData) == 0x3C0, + "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) < 16384, + "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Sets the OpenGL shader in accordance with the current PICA register state void SetShader(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7efd0038a..5cbad9b43 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -9,18 +9,14 @@ #include <unordered_set> #include <utility> #include <vector> - #include <glad/glad.h> - #include "common/bit_field.h" #include "common/emu_window.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" #include "common/vector_math.h" - #include "core/memory.h" - #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica_state.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" @@ -35,18 +31,18 @@ struct FormatTuple { }; static const std::array<FormatTuple, 5> fb_format_tuples = {{ - { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8 - { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8 - { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1 - { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565 - { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 + {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }}; static const std::array<FormatTuple, 4> depth_format_tuples = {{ - { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16 + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 {}, - { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24 - { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8 + {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 }}; RasterizerCacheOpenGL::RasterizerCacheOpenGL() { @@ -58,7 +54,9 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FlushAll(); } -static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) { +static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, + u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, + u8* gl_data, bool morton_to_gl) { using PixelFormat = CachedSurface::PixelFormat; u8* data_ptrs[2]; @@ -72,7 +70,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, for (unsigned y = 0; y < height; ++y) { for (unsigned x = 0; x < width; ++x) { const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; + u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + + coarse_y * width * bytes_per_pixel; u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; data_ptrs[morton_to_gl] = morton_data + morton_offset; @@ -81,7 +80,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, // Swap depth and stencil value ordering since 3DS does not match OpenGL u32 depth_stencil; memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); - depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]); + depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | + (depth_stencil >> depth_stencil_shifts[1]); memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); } @@ -90,7 +90,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, for (unsigned y = 0; y < height; ++y) { for (unsigned x = 0; x < width; ++x) { const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; + u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + + coarse_y * width * bytes_per_pixel; u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; data_ptrs[morton_to_gl] = morton_data + morton_offset; @@ -102,17 +103,21 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, } } -bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) { +bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, + CachedSurface::SurfaceType type, + const MathUtil::Rectangle<int>& src_rect, + const MathUtil::Rectangle<int>& dst_rect) { using SurfaceType = CachedSurface::SurfaceType; OpenGLState cur_state = OpenGLState::GetCurState(); - // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components + // Make sure textures aren't bound to texture units, since going to bind them to framebuffer + // components OpenGLState::ResetTexture(src_tex); OpenGLState::ResetTexture(dst_tex); // Keep track of previous framebuffer bindings - GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer }; + GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer}; cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; cur_state.Apply(); @@ -120,11 +125,15 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS u32 buffers = 0; if (type == SurfaceType::Color || type == SurfaceType::Texture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); buffers = GL_COLOR_BUFFER_BIT; } else if (type == SurfaceType::Depth) { @@ -139,10 +148,12 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS buffers = GL_DEPTH_BUFFER_BIT; } else if (type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_tex, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_tex, 0); buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } @@ -155,9 +166,9 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS return false; } - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, - dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, - buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, + dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); // Restore previous framebuffer bindings cur_state.draw.read_framebuffer = old_fbs[0]; @@ -167,17 +178,24 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS return true; } -bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) { +bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, + const MathUtil::Rectangle<int>& src_rect, + CachedSurface* dst_surface, + const MathUtil::Rectangle<int>& dst_rect) { using SurfaceType = CachedSurface::SurfaceType; - if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { + if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, + dst_surface->pixel_format)) { return false; } - return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); + return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, + CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, + dst_rect); } -static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) { +static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, + u32 width, u32 height) { // Allocate an uninitialized texture of appropriate size and format for the surface using SurfaceType = CachedSurface::SurfaceType; @@ -200,11 +218,11 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi ASSERT(tuple_idx < depth_format_tuples.size()); tuple = depth_format_tuples[tuple_idx]; } else { - tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; + tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; } - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, - tuple.format, tuple.type, nullptr); + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format, + tuple.type, nullptr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -217,7 +235,8 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi } MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); -CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) { +CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, + bool load_if_create) { using PixelFormat = CachedSurface::PixelFormat; using SurfaceType = CachedSurface::SurfaceType; @@ -225,29 +244,31 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo return nullptr; } - u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + u32 params_size = + params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; // Check for an exact match in existing surfaces CachedSurface* best_exact_surface = nullptr; float exact_surface_goodness = -1.f; - auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); + auto surface_interval = + boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); auto range = surface_cache.equal_range(surface_interval); for (auto it = range.first; it != range.second; ++it) { for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { CachedSurface* surface = it2->get(); // Check if the request matches the surface exactly - if (params.addr == surface->addr && - params.width == surface->width && params.height == surface->height && - params.pixel_format == surface->pixel_format) - { + if (params.addr == surface->addr && params.width == surface->width && + params.height == surface->height && params.pixel_format == surface->pixel_format) { // Make sure optional param-matching criteria are fulfilled bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); + bool res_scale_match = (params.res_scale_width == surface->res_scale_width && + params.res_scale_height == surface->res_scale_height); if (!match_res_scale || res_scale_match) { // Prioritize same-tiling and highest resolution surfaces - float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; + float match_goodness = + (float)tiling_match + surface->res_scale_width * surface->res_scale_height; if (match_goodness > exact_surface_goodness || surface->dirty) { exact_surface_goodness = match_goodness; best_exact_surface = surface; @@ -288,9 +309,11 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo if (!load_if_create) { // Don't load any data; just allocate the surface's texture - AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); + AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, + new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); } else { - // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game + // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead + // of memory upload below if that's a common scenario in some game Memory::RasterizerFlushRegion(params.addr, params_size); @@ -318,7 +341,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo tuple = fb_format_tuples[(unsigned int)params.pixel_format]; } else { // Texture - tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; + tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; } std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); @@ -326,19 +349,23 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo Pica::DebugUtils::TextureInfo tex_info; tex_info.width = params.width; tex_info.height = params.height; - tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + tex_info.stride = + params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format; tex_info.physical_address = params.addr; for (unsigned y = 0; y < params.height; ++y) { for (unsigned x = 0; x < params.width; ++x) { - tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info); + tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture( + texture_src_data, x, params.height - 1 - y, tex_info); } } - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, + 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format + // Depth/Stencil formats need special treatment since they aren't sampleable using + // LookupTexture and can't use RGBA format size_t tuple_idx = (size_t)params.pixel_format - 14; ASSERT(tuple_idx < depth_format_tuples.size()); const FormatTuple& tuple = depth_format_tuples[tuple_idx]; @@ -350,14 +377,18 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel); + std::vector<u8> temp_fb_depth_buffer(params.width * params.height * + gl_bytes_per_pixel); - u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); + u8* temp_fb_depth_buffer_ptr = + use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); - MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true); + MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, + gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, + true); - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, - tuple.format, tuple.type, temp_fb_depth_buffer.data()); + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, + 0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); } } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); @@ -367,10 +398,13 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo OGLTexture scaled_texture; scaled_texture.Create(); - AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format), - MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), - MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight())); + AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, + new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); + BlitTextures(new_surface->texture.handle, scaled_texture.handle, + CachedSurface::GetFormatType(new_surface->pixel_format), + MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), + MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), + new_surface->GetScaledHeight())); new_surface->texture.Release(); new_surface->texture.handle = scaled_texture.handle; @@ -389,11 +423,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo } Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); - surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface }))); + surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open( + new_surface->addr, new_surface->addr + new_surface->size), + std::set<std::shared_ptr<CachedSurface>>({new_surface}))); return new_surface.get(); } -CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) { +CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, + bool match_res_scale, bool load_if_create, + MathUtil::Rectangle<int>& out_rect) { if (params.addr == 0) { return nullptr; } @@ -405,7 +443,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params CachedSurface* best_subrect_surface = nullptr; float subrect_surface_goodness = -1.f; - auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); + auto surface_interval = + boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); auto cache_upper_bound = surface_cache.upper_bound(surface_interval); for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { @@ -414,14 +453,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params // Check if the request is contained in the surface if (params.addr >= surface->addr && params.addr + params_size - 1 <= surface->addr + surface->size - 1 && - params.pixel_format == surface->pixel_format) - { + params.pixel_format == surface->pixel_format) { // Make sure optional param-matching criteria are fulfilled bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); + bool res_scale_match = (params.res_scale_width == surface->res_scale_width && + params.res_scale_height == surface->res_scale_height); if (!match_res_scale || res_scale_match) { // Prioritize same-tiling and highest resolution surfaces - float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; + float match_goodness = + (float)tiling_match + surface->res_scale_width * surface->res_scale_height; if (match_goodness > subrect_surface_goodness || surface->dirty) { subrect_surface_goodness = match_goodness; best_subrect_surface = surface; @@ -433,7 +473,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params // Return the best subrect surface if found if (best_subrect_surface != nullptr) { - unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); + unsigned int bytes_per_pixel = + (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); int x0, y0; @@ -452,7 +493,9 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params y0 = begin_tile_index / tiles_per_row * 8; // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. - out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height)); + out_rect = + MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, + best_subrect_surface->height - (y0 + params.height)); } out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); @@ -465,16 +508,20 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params // No subrect found - create and return a new surface if (!params.is_tiled) { - out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height)); + out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), + (int)(params.height * params.res_scale_height)); } else { - out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0); + out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), + (int)(params.width * params.res_scale_width), 0); } return GetSurface(params, match_res_scale, load_if_create); } -CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) { - Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); +CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( + const Pica::Regs::FullTextureConfig& config) { + Pica::DebugUtils::TextureInfo info = + Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); CachedSurface params; params.addr = info.physical_address; @@ -485,20 +532,28 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTe return GetSurface(params, false, true); } -std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { +std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> +RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { const auto& regs = Pica::g_state.regs; // Make sur that framebuffers don't overlap if both color and depth are being used u32 fb_area = config.GetWidth() * config.GetHeight(); - bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 && - config.GetDepthBufferPhysicalAddress() != 0 && - MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), - config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); + bool framebuffers_overlap = + config.GetColorBufferPhysicalAddress() != 0 && + config.GetDepthBufferPhysicalAddress() != 0 && + MathUtil::IntervalsIntersect( + config.GetColorBufferPhysicalAddress(), + fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), + config.GetDepthBufferPhysicalAddress(), + fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; - bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap); + bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && + (regs.output_merger.depth_test_enable || + regs.output_merger.depth_write_enable || !framebuffers_overlap); if (framebuffers_overlap && using_color_fb && using_depth_fb) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); using_depth_fb = false; } @@ -512,8 +567,10 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); // Assume same scaling factor for top and bottom screens - color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth; - color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight; + color_params.res_scale_width = depth_params.res_scale_width = + (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth; + color_params.res_scale_height = depth_params.res_scale_height = + (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight; } color_params.addr = config.GetColorBufferPhysicalAddress(); @@ -523,22 +580,28 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); MathUtil::Rectangle<int> color_rect; - CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; + CachedSurface* color_surface = + using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; MathUtil::Rectangle<int> depth_rect; - CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; + CachedSurface* depth_surface = + using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; // Sanity check to make sure found surfaces aren't the same if (using_depth_fb && using_color_fb && color_surface == depth_surface) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); + LOG_CRITICAL( + Render_OpenGL, + "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); using_depth_fb = false; depth_surface = nullptr; } MathUtil::Rectangle<int> rect; - if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { - // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match + if (color_surface != nullptr && depth_surface != nullptr && + (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { + // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if + // they don't match if (color_rect.left != 0 || color_rect.top != 0) { color_surface = GetSurface(color_params, true, true); } @@ -548,9 +611,13 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC } if (!color_surface->is_tiled) { - rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height)); + rect = MathUtil::Rectangle<int>( + 0, 0, (int)(color_params.width * color_params.res_scale_width), + (int)(color_params.height * color_params.res_scale_height)); } else { - rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0); + rect = MathUtil::Rectangle<int>( + 0, (int)(color_params.height * color_params.res_scale_height), + (int)(color_params.width * color_params.res_scale_width), 0); } } else if (color_surface != nullptr) { rect = color_rect; @@ -564,7 +631,8 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC } CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress()); + auto surface_interval = + boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress()); auto range = surface_cache.equal_range(surface_interval); for (auto it = range.first; it != range.second; ++it) { for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { @@ -581,8 +649,9 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF if (surface->addr == config.GetStartAddress() && CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && - (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress())) - { + (surface->width * surface->height * + CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == + (config.GetEndAddress() - config.GetStartAddress())) { return surface; } } @@ -617,8 +686,11 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { unscaled_tex.Create(); - AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height); - BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format), + AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, + surface->height); + BlitTextures( + surface->texture.handle, unscaled_tex.handle, + CachedSurface::GetFormatType(surface->pixel_format), MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), MathUtil::Rectangle<int>(0, 0, surface->width, surface->height)); @@ -648,10 +720,14 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false); + // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion + // is necessary. + MortonCopyPixels(surface->pixel_format, surface->width, surface->height, + bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), + false); } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format + // Depth/Stencil formats need special treatment since they aren't sampleable using + // LookupTexture and can't use RGBA format size_t tuple_idx = (size_t)surface->pixel_format - 14; ASSERT(tuple_idx < depth_format_tuples.size()); const FormatTuple& tuple = depth_format_tuples[tuple_idx]; @@ -669,7 +745,9 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false); + MortonCopyPixels(surface->pixel_format, surface->width, surface->height, + bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, + false); } } glPixelStorei(GL_PACK_ROW_LENGTH, 0); @@ -680,7 +758,8 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { cur_state.Apply(); } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) { +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, + bool invalidate) { if (size == 0) { return; } @@ -691,8 +770,11 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size); auto cache_upper_bound = surface_cache.upper_bound(surface_interval); for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()), - [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); }); + std::copy_if(it->second.begin(), it->second.end(), + std::inserter(touching_surfaces, touching_surfaces.end()), + [skip_surface](std::shared_ptr<CachedSurface> surface) { + return (surface.get() != skip_surface); + }); } // Flush and invalidate surfaces @@ -700,7 +782,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac FlushSurface(surface.get()); if (invalidate) { Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); - surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface }))); + surface_cache.subtract( + std::make_pair(boost::icl::interval<PAddr>::right_open( + surface->addr, surface->addr + surface->size), + std::set<std::shared_ptr<CachedSurface>>({surface}))); } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 225596415..849530d86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -8,21 +8,18 @@ #include <memory> #include <set> #include <tuple> - #include <boost/icl/interval_map.hpp> #include <glad/glad.h> - #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" - #include "core/hw/gpu.h" - #include "video_core/pica.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace MathUtil { -template <class T> struct Rectangle; +template <class T> +struct Rectangle; } struct CachedSurface; @@ -32,38 +29,38 @@ using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<Ca struct CachedSurface { enum class PixelFormat { // First 5 formats are shared between textures and color buffers - RGBA8 = 0, - RGB8 = 1, - RGB5A1 = 2, - RGB565 = 3, - RGBA4 = 4, + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, // Texture-only formats - IA8 = 5, - RG8 = 6, - I8 = 7, - A8 = 8, - IA4 = 9, - I4 = 10, - A4 = 11, - ETC1 = 12, - ETC1A4 = 13, + IA8 = 5, + RG8 = 6, + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, + ETC1A4 = 13, // Depth buffer-only formats - D16 = 14, + D16 = 14, // gap - D24 = 16, - D24S8 = 17, + D24 = 16, + D24S8 = 17, - Invalid = 255, + Invalid = 255, }; enum class SurfaceType { - Color = 0, - Texture = 1, - Depth = 2, + Color = 0, + Texture = 1, + Depth = 2, DepthStencil = 3, - Invalid = 4, + Invalid = 4, }; static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { @@ -101,7 +98,8 @@ struct CachedSurface { } static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) { - return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid; + return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) + : PixelFormat::Invalid; } static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { @@ -120,7 +118,8 @@ struct CachedSurface { SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType b_type = GetFormatType(pixel_format_b); - if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && + (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { return true; } @@ -187,22 +186,30 @@ public: ~RasterizerCacheOpenGL(); /// Blits one texture to another - bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect); + bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, + const MathUtil::Rectangle<int>& src_rect, + const MathUtil::Rectangle<int>& dst_rect); /// Attempt to blit one surface's texture to another - bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect); + bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, + CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect); /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create); + CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, + bool load_if_create); - /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect); + /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from + /// 3DS memory to OpenGL and caches it (if not already cached) + CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, + bool load_if_create, MathUtil::Rectangle<int>& out_rect); /// Gets a surface based on the texture configuration CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config); - /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration - std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config); + /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer + /// configuration + std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces( + const Pica::Regs::FramebufferConfig& config); /// Attempt to get a surface that exactly matches the fill region and format CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); @@ -210,7 +217,8 @@ public: /// Write the surface back to memory void FlushSurface(CachedSurface* surface); - /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache + /// Write any cached resources overlapping the region back to memory (if dirty) and optionally + /// invalidate them in the cache void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); /// Flush all cached resources tracked by this cache manager diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index eb128966c..13301ec9f 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -5,30 +5,36 @@ #pragma once #include <utility> - #include <glad/glad.h> - #include "common/common_types.h" - #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state.h" class OGLTexture : private NonCopyable { public: OGLTexture() = default; - OGLTexture(OGLTexture&& o) { std::swap(handle, o.handle); } - ~OGLTexture() { Release(); } - OGLTexture& operator=(OGLTexture&& o) { std::swap(handle, o.handle); return *this; } + OGLTexture(OGLTexture&& o) { + std::swap(handle, o.handle); + } + ~OGLTexture() { + Release(); + } + OGLTexture& operator=(OGLTexture&& o) { + std::swap(handle, o.handle); + return *this; + } /// Creates a new internal OpenGL resource and stores the handle void Create() { - if (handle != 0) return; + if (handle != 0) + return; glGenTextures(1, &handle); } /// Deletes the internal OpenGL resource void Release() { - if (handle == 0) return; + if (handle == 0) + return; glDeleteTextures(1, &handle); OpenGLState::ResetTexture(handle); handle = 0; @@ -40,19 +46,28 @@ public: class OGLSampler : private NonCopyable { public: OGLSampler() = default; - OGLSampler(OGLSampler&& o) { std::swap(handle, o.handle); } - ~OGLSampler() { Release(); } - OGLSampler& operator=(OGLSampler&& o) { std::swap(handle, o.handle); return *this; } + OGLSampler(OGLSampler&& o) { + std::swap(handle, o.handle); + } + ~OGLSampler() { + Release(); + } + OGLSampler& operator=(OGLSampler&& o) { + std::swap(handle, o.handle); + return *this; + } /// Creates a new internal OpenGL resource and stores the handle void Create() { - if (handle != 0) return; + if (handle != 0) + return; glGenSamplers(1, &handle); } /// Deletes the internal OpenGL resource void Release() { - if (handle == 0) return; + if (handle == 0) + return; glDeleteSamplers(1, &handle); OpenGLState::ResetSampler(handle); handle = 0; @@ -64,19 +79,28 @@ public: class OGLShader : private NonCopyable { public: OGLShader() = default; - OGLShader(OGLShader&& o) { std::swap(handle, o.handle); } - ~OGLShader() { Release(); } - OGLShader& operator=(OGLShader&& o) { std::swap(handle, o.handle); return *this; } + OGLShader(OGLShader&& o) { + std::swap(handle, o.handle); + } + ~OGLShader() { + Release(); + } + OGLShader& operator=(OGLShader&& o) { + std::swap(handle, o.handle); + return *this; + } /// Creates a new internal OpenGL resource and stores the handle void Create(const char* vert_shader, const char* frag_shader) { - if (handle != 0) return; + if (handle != 0) + return; handle = GLShader::LoadProgram(vert_shader, frag_shader); } /// Deletes the internal OpenGL resource void Release() { - if (handle == 0) return; + if (handle == 0) + return; glDeleteProgram(handle); OpenGLState::ResetProgram(handle); handle = 0; @@ -88,19 +112,28 @@ public: class OGLBuffer : private NonCopyable { public: OGLBuffer() = default; - OGLBuffer(OGLBuffer&& o) { std::swap(handle, o.handle); } - ~OGLBuffer() { Release(); } - OGLBuffer& operator=(OGLBuffer&& o) { std::swap(handle, o.handle); return *this; } + OGLBuffer(OGLBuffer&& o) { + std::swap(handle, o.handle); + } + ~OGLBuffer() { + Release(); + } + OGLBuffer& operator=(OGLBuffer&& o) { + std::swap(handle, o.handle); + return *this; + } /// Creates a new internal OpenGL resource and stores the handle void Create() { - if (handle != 0) return; + if (handle != 0) + return; glGenBuffers(1, &handle); } /// Deletes the internal OpenGL resource void Release() { - if (handle == 0) return; + if (handle == 0) + return; glDeleteBuffers(1, &handle); OpenGLState::ResetBuffer(handle); handle = 0; @@ -112,19 +145,28 @@ public: class OGLVertexArray : private NonCopyable { public: OGLVertexArray() = default; - OGLVertexArray(OGLVertexArray&& o) { std::swap(handle, o.handle); } - ~OGLVertexArray() { Release(); } - OGLVertexArray& operator=(OGLVertexArray&& o) { std::swap(handle, o.handle); return *this; } + OGLVertexArray(OGLVertexArray&& o) { + std::swap(handle, o.handle); + } + ~OGLVertexArray() { + Release(); + } + OGLVertexArray& operator=(OGLVertexArray&& o) { + std::swap(handle, o.handle); + return *this; + } /// Creates a new internal OpenGL resource and stores the handle void Create() { - if (handle != 0) return; + if (handle != 0) + return; glGenVertexArrays(1, &handle); } /// Deletes the internal OpenGL resource void Release() { - if (handle == 0) return; + if (handle == 0) + return; glDeleteVertexArrays(1, &handle); OpenGLState::ResetVertexArray(handle); handle = 0; @@ -136,19 +178,28 @@ public: class OGLFramebuffer : private NonCopyable { public: OGLFramebuffer() = default; - OGLFramebuffer(OGLFramebuffer&& o) { std::swap(handle, o.handle); } - ~OGLFramebuffer() { Release(); } - OGLFramebuffer& operator=(OGLFramebuffer&& o) { std::swap(handle, o.handle); return *this; } + OGLFramebuffer(OGLFramebuffer&& o) { + std::swap(handle, o.handle); + } + ~OGLFramebuffer() { + Release(); + } + OGLFramebuffer& operator=(OGLFramebuffer&& o) { + std::swap(handle, o.handle); + return *this; + } /// Creates a new internal OpenGL resource and stores the handle void Create() { - if (handle != 0) return; + if (handle != 0) + return; glGenFramebuffers(1, &handle); } /// Deletes the internal OpenGL resource void Release() { - if (handle == 0) return; + if (handle == 0) + return; glDeleteFramebuffers(1, &handle); OpenGLState::ResetFramebuffer(handle); handle = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 3de372f67..1808ee0a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -4,11 +4,9 @@ #include <array> #include <cstddef> - #include "common/assert.h" #include "common/bit_field.h" #include "common/logging/log.h" - #include "video_core/pica.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -21,19 +19,18 @@ namespace GLShader { /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) static bool IsPassThroughTevStage(const TevStageConfig& stage) { - return (stage.color_op == TevStageConfig::Operation::Replace && - stage.alpha_op == TevStageConfig::Operation::Replace && - stage.color_source1 == TevStageConfig::Source::Previous && - stage.alpha_source1 == TevStageConfig::Source::Previous && - stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && - stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && - stage.GetAlphaMultiplier() == 1); + return (stage.color_op == TevStageConfig::Operation::Replace && + stage.alpha_op == TevStageConfig::Operation::Replace && + stage.color_source1 == TevStageConfig::Source::Previous && + stage.alpha_source1 == TevStageConfig::Source::Previous && + stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); } /// Writes the specified TEV stage source component(s) -static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, - const std::string& index_name) { +static void AppendSource(std::string& out, const PicaShaderConfig& config, + TevStageConfig::Source source, const std::string& index_name) { const auto& state = config.state; using Source = TevStageConfig::Source; switch (source) { @@ -48,7 +45,7 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt break; case Source::Texture0: // Only unit 0 respects the texturing type (according to 3DBrew) - switch(state.texture0_type) { + switch (state.texture0_type) { case Pica::Regs::TextureConfig::Texture2D: out += "texture(tex[0], texcoord[0])"; break; @@ -57,7 +54,8 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt break; default: out += "texture(tex[0], texcoord[0])"; - LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type)); + LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", + static_cast<int>(state.texture0_type)); UNIMPLEMENTED(); break; } @@ -85,8 +83,9 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt } /// Writes the color components to use for the specified TEV stage color modifier -static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, - TevStageConfig::Source source, const std::string& index_name) { +static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, + TevStageConfig::ColorModifier modifier, + TevStageConfig::Source source, const std::string& index_name) { using ColorModifier = TevStageConfig::ColorModifier; switch (modifier) { case ColorModifier::SourceColor: @@ -142,8 +141,9 @@ static void AppendColorModifier(std::string& out, const PicaShaderConfig& config } /// Writes the alpha component to use for the specified TEV stage alpha modifier -static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, - TevStageConfig::Source source, const std::string& index_name) { +static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, + TevStageConfig::AlphaModifier modifier, + TevStageConfig::Source source, const std::string& index_name) { using AlphaModifier = TevStageConfig::AlphaModifier; switch (modifier) { case AlphaModifier::SourceAlpha: @@ -191,7 +191,7 @@ static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config /// Writes the combiner function for the color components for the specified TEV stage operation static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, - const std::string& variable_name) { + const std::string& variable_name) { out += "clamp("; using Operation = TevStageConfig::Operation; switch (operation) { @@ -208,8 +208,10 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)"; break; case Operation::Lerp: - // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use builtin lerp - out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (vec3(1.0) - " + variable_name + "[2])"; + // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use + // builtin lerp + out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + + "[1] * (vec3(1.0) - " + variable_name + "[2])"; break; case Operation::Subtract: out += variable_name + "[0] - " + variable_name + "[1]"; @@ -218,10 +220,12 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; break; case Operation::AddThenMultiply: - out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; + out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + + variable_name + "[2]"; break; case Operation::Dot3_RGB: - out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)"; + out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + + "[1] - vec3(0.5)) * 4.0)"; break; default: out += "vec3(0.0)"; @@ -233,7 +237,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper /// Writes the combiner function for the alpha component for the specified TEV stage operation static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, - const std::string& variable_name) { + const std::string& variable_name) { out += "clamp("; using Operation = TevStageConfig::Operation; switch (operation) { @@ -250,7 +254,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper out += variable_name + "[0] + " + variable_name + "[1] - 0.5"; break; case Operation::Lerp: - out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (1.0 - " + variable_name + "[2])"; + out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + + "[1] * (1.0 - " + variable_name + "[2])"; break; case Operation::Subtract: out += variable_name + "[0] - " + variable_name + "[1]"; @@ -259,7 +264,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; break; case Operation::AddThenMultiply: - out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]"; + out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + + "[2]"; break; default: out += "0.0"; @@ -284,9 +290,8 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { case CompareFunc::LessThan: case CompareFunc::LessThanOrEqual: case CompareFunc::GreaterThan: - case CompareFunc::GreaterThanOrEqual: - { - static const char* op[] = { "!=", "==", ">=", ">", "<=", "<", }; + case CompareFunc::GreaterThanOrEqual: { + static const char* op[] = {"!=", "==", ">=", ">", "<=", "<"}; unsigned index = (unsigned)func - (unsigned)CompareFunc::Equal; out += "int(last_tex_env_out.a * 255.0f) " + std::string(op[index]) + " alphatest_ref"; break; @@ -301,7 +306,8 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { /// Writes the code to emulate the specified TEV stage static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { - const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); + const auto stage = + static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); if (!IsPassThroughTevStage(stage)) { std::string index_name = std::to_string(index); @@ -330,8 +336,12 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi out += ";\n"; out += "last_tex_env_out = vec4(" - "clamp(color_output_" + index_name + " * " + std::to_string(stage.GetColorMultiplier()) + ".0, vec3(0.0), vec3(1.0))," - "clamp(alpha_output_" + index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) + ".0, 0.0, 1.0));\n"; + "clamp(color_output_" + + index_name + " * " + std::to_string(stage.GetColorMultiplier()) + + ".0, vec3(0.0), vec3(1.0))," + "clamp(alpha_output_" + + index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) + + ".0, 0.0, 1.0));\n"; } out += "combiner_buffer = next_combiner_buffer;\n"; @@ -355,13 +365,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Compute fragment normals if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { - // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture + // Bump mapping is enabled using a normal map, read perturbation vector from the selected + // texture std::string bump_selector = std::to_string(lighting.bump_selector); - out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; + out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + + bump_selector + "]).rgb - 1.0;\n"; - // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher + // precision result if (lighting.bump_renorm) { - std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; + std::string val = + "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; } } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { @@ -373,7 +387,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; } - // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to + // eyespace out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; // Gets the index into the specified lookup table for specular lighting @@ -406,12 +421,14 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (abs) { // LUT index is in the range of (0.0, 1.0) - index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" + : "max(" + index + ", 0.f)"; return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; } else { // LUT index is in the range of (-1.0, 1.0) index = "clamp(" + index + ", -1.0, 1.0)"; - return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; + return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + + ") / 2.0)"; } return std::string(); @@ -434,52 +451,74 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { else out += "light_vector = normalize(" + light_src + ".position + view);\n"; - // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided - std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or + // two-sided + std::string dot_product = light_config.two_sided_diffuse + ? "abs(dot(light_vector, normal))" + : "max(dot(light_vector, normal), 0.0)"; // If enabled, compute distance attenuation value std::string dist_atten = "1.0"; if (light_config.dist_atten_enable) { - std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + light_src + ".position) + " + light_src + ".dist_atten_bias)"; + std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + + light_src + ".position) + " + light_src + ".dist_atten_bias)"; index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; - const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); + const unsigned lut_num = + ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); } // If enabled, clamp specular component if lighting result is negative - std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + std::string clamp_highlights = + lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; // Specular 0 component std::string d0_lut_value = "1.0"; - if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { + if (lighting.lut_d0.enable && + Pica::Regs::IsLightingSamplerSupported(lighting.config, + Pica::Regs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); - d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; + std::string index = + GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { - std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; + if (lighting.lut_rr.enable && + Pica::Regs::IsLightingSamplerSupported(lighting.config, + Pica::Regs::LightingSampler::ReflectRed)) { + std::string index = + GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; out += "refl_value.r = " + value + ";\n"; } else { out += "refl_value.r = 1.0;\n"; } // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { - std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; + if (lighting.lut_rg.enable && + Pica::Regs::IsLightingSamplerSupported(lighting.config, + Pica::Regs::LightingSampler::ReflectGreen)) { + std::string index = + GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; out += "refl_value.g = " + value + ";\n"; } else { out += "refl_value.g = refl_value.r;\n"; } // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { - std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; + if (lighting.lut_rb.enable && + Pica::Regs::IsLightingSamplerSupported(lighting.config, + Pica::Regs::LightingSampler::ReflectBlue)) { + std::string index = + GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; out += "refl_value.b = " + value + ";\n"; } else { out += "refl_value.b = refl_value.r;\n"; @@ -487,18 +526,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Specular 1 component std::string d1_lut_value = "1.0"; - if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { + if (lighting.lut_d1.enable && + Pica::Regs::IsLightingSamplerSupported(lighting.config, + Pica::Regs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); - d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; + std::string index = + GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; } - std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; + std::string specular_1 = + "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; // Fresnel - if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { + if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported( + lighting.config, Pica::Regs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); - std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; + std::string index = + GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; // Enabled for difffuse lighting alpha component if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || @@ -512,10 +559,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } // Compute primary fragment color (diffuse lighting) function - out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; + out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + + light_src + ".ambient) * " + dist_atten + ";\n"; // Compute secondary fragment color (specular lighting) function - out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; + out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + + clamp_highlights + " * " + dist_atten + ";\n"; } // Sum final lighting result @@ -598,9 +647,9 @@ vec4 secondary_fragment_color = vec4(0.0); out += "!"; // x2,y2 have +1 added to cover the entire pixel area out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && " - "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && " - "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && " - "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n"; + "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && " + "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && " + "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n"; } out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; @@ -638,9 +687,11 @@ vec4 secondary_fragment_color = vec4(0.0); out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; out += "float fog_f = fog_index - fog_i;\n"; out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n"; - out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> 19);\n"; // Extract signed difference + out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> " + "19);\n"; // Extract signed difference out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n"; - out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / 2047.0;\n"; + out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / " + "2047.0;\n"; out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; // Blend the fog @@ -658,14 +709,20 @@ vec4 secondary_fragment_color = vec4(0.0); std::string GenerateVertexShader() { std::string out = "#version 330 core\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + + ") in vec4 vert_position;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + + ") in vec2 vert_texcoord0;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + + ") in vec2 vert_texcoord1;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + + ") in vec2 vert_texcoord2;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + + ") in float vert_texcoord0_w;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + + ") in vec4 vert_normquat;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; out += R"( out vec4 primary_color; diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index dded3db46..fe07aa6eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -3,9 +3,7 @@ // Refer to the license.txt file included. #include <vector> - #include <glad/glad.h> - #include "common/logging/log.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -56,7 +54,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { if (result) { LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", &fragment_shader_error[0]); + LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", + &fragment_shader_error[0]); } } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 13ee986b9..ed84cadea 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -3,10 +3,8 @@ // Refer to the license.txt file included. #include <glad/glad.h> - #include "common/common_funcs.h" #include "common/logging/log.h" - #include "video_core/renderer_opengl/gl_state.h" OpenGLState OpenGLState::cur_state; @@ -106,11 +104,11 @@ void OpenGLState::Apply() const { // Color mask if (color_mask.red_enabled != cur_state.color_mask.red_enabled || - color_mask.green_enabled != cur_state.color_mask.green_enabled || - color_mask.blue_enabled != cur_state.color_mask.blue_enabled || - color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { - glColorMask(color_mask.red_enabled, color_mask.green_enabled, - color_mask.blue_enabled, color_mask.alpha_enabled); + color_mask.green_enabled != cur_state.color_mask.green_enabled || + color_mask.blue_enabled != cur_state.color_mask.blue_enabled || + color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { + glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled, + color_mask.alpha_enabled); } // Stencil test @@ -123,15 +121,16 @@ void OpenGLState::Apply() const { } if (stencil.test_func != cur_state.stencil.test_func || - stencil.test_ref != cur_state.stencil.test_ref || - stencil.test_mask != cur_state.stencil.test_mask) { + stencil.test_ref != cur_state.stencil.test_ref || + stencil.test_mask != cur_state.stencil.test_mask) { glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); } if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || - stencil.action_depth_pass != cur_state.stencil.action_depth_pass || - stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { - glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass); + stencil.action_depth_pass != cur_state.stencil.action_depth_pass || + stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { + glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, + stencil.action_depth_pass); } // Stencil mask @@ -154,23 +153,22 @@ void OpenGLState::Apply() const { } if (blend.color.red != cur_state.blend.color.red || - blend.color.green != cur_state.blend.color.green || - blend.color.blue != cur_state.blend.color.blue || - blend.color.alpha != cur_state.blend.color.alpha) { - glBlendColor(blend.color.red, blend.color.green, - blend.color.blue, blend.color.alpha); + blend.color.green != cur_state.blend.color.green || + blend.color.blue != cur_state.blend.color.blue || + blend.color.alpha != cur_state.blend.color.alpha) { + glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha); } if (blend.src_rgb_func != cur_state.blend.src_rgb_func || - blend.dst_rgb_func != cur_state.blend.dst_rgb_func || - blend.src_a_func != cur_state.blend.src_a_func || - blend.dst_a_func != cur_state.blend.dst_a_func) { - glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, - blend.src_a_func, blend.dst_a_func); + blend.dst_rgb_func != cur_state.blend.dst_rgb_func || + blend.src_a_func != cur_state.blend.src_a_func || + blend.dst_a_func != cur_state.blend.dst_a_func) { + glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, + blend.dst_a_func); } if (blend.rgb_equation != cur_state.blend.rgb_equation || - blend.a_equation != cur_state.blend.a_equation) { + blend.a_equation != cur_state.blend.a_equation) { glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); } @@ -237,8 +235,11 @@ void OpenGLState::Apply() const { GLenum OpenGLState::CheckFBStatus(GLenum target) { GLenum fb_status = glCheckFramebufferStatus(target); if (fb_status != GL_FRAMEBUFFER_COMPLETE) { - const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK")); - LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status); + const char* fb_description = + (target == GL_READ_FRAMEBUFFER ? "READ" + : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK")); + LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, + fb_status); } return fb_status; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 13c71b0a6..01dead883 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -9,14 +9,14 @@ class OpenGLState { public: struct { - bool enabled; // GL_CULL_FACE - GLenum mode; // GL_CULL_FACE_MODE + bool enabled; // GL_CULL_FACE + GLenum mode; // GL_CULL_FACE_MODE GLenum front_face; // GL_FRONT_FACE } cull; struct { - bool test_enabled; // GL_DEPTH_TEST - GLenum test_func; // GL_DEPTH_FUNC + bool test_enabled; // GL_DEPTH_TEST + GLenum test_func; // GL_DEPTH_FUNC GLboolean write_mask; // GL_DEPTH_WRITEMASK } depth; @@ -28,24 +28,24 @@ public: } color_mask; // GL_COLOR_WRITEMASK struct { - bool test_enabled; // GL_STENCIL_TEST - GLenum test_func; // GL_STENCIL_FUNC - GLint test_ref; // GL_STENCIL_REF - GLuint test_mask; // GL_STENCIL_VALUE_MASK - GLuint write_mask; // GL_STENCIL_WRITEMASK + bool test_enabled; // GL_STENCIL_TEST + GLenum test_func; // GL_STENCIL_FUNC + GLint test_ref; // GL_STENCIL_REF + GLuint test_mask; // GL_STENCIL_VALUE_MASK + GLuint write_mask; // GL_STENCIL_WRITEMASK GLenum action_stencil_fail; // GL_STENCIL_FAIL - GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL - GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS + GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL + GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS } stencil; struct { - bool enabled; // GL_BLEND + bool enabled; // GL_BLEND GLenum rgb_equation; // GL_BLEND_EQUATION_RGB - GLenum a_equation; // GL_BLEND_EQUATION_ALPHA + GLenum a_equation; // GL_BLEND_EQUATION_ALPHA GLenum src_rgb_func; // GL_BLEND_SRC_RGB GLenum dst_rgb_func; // GL_BLEND_DST_RGB - GLenum src_a_func; // GL_BLEND_SRC_ALPHA - GLenum dst_a_func; // GL_BLEND_DST_ALPHA + GLenum src_a_func; // GL_BLEND_SRC_ALPHA + GLenum dst_a_func; // GL_BLEND_DST_ALPHA struct { GLclampf red; @@ -60,7 +60,7 @@ public: // 3 texture units - one for each that is used in PICA fragment shader emulation struct { GLuint texture_2d; // GL_TEXTURE_BINDING_2D - GLuint sampler; // GL_SAMPLER_BINDING + GLuint sampler; // GL_SAMPLER_BINDING } texture_units[3]; struct { @@ -74,10 +74,10 @@ public: struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING - GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING - GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING - GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING - GLuint shader_program; // GL_CURRENT_PROGRAM + GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING + GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING + GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING + GLuint shader_program; // GL_CURRENT_PROGRAM } draw; OpenGLState(); diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index d9b9c9cc2..cc49867c8 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -6,15 +6,12 @@ #include <array> #include <cstddef> - #include <glad/glad.h> - #include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/logging/log.h" - #include "video_core/pica.h" using GLvec2 = std::array<GLfloat, 2>; @@ -25,8 +22,8 @@ namespace PicaToGL { inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { static const GLenum filter_mode_table[] = { - GL_NEAREST, // TextureFilter::Nearest - GL_LINEAR // TextureFilter::Linear + GL_NEAREST, // TextureFilter::Nearest + GL_LINEAR, // TextureFilter::Linear }; // Range check table for input @@ -52,10 +49,10 @@ inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { static const GLenum wrap_mode_table[] = { - GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge - GL_CLAMP_TO_BORDER,// WrapMode::ClampToBorder - GL_REPEAT, // WrapMode::Repeat - GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat + GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge + GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder + GL_REPEAT, // WrapMode::Repeat + GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat }; // Range check table for input @@ -131,22 +128,22 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { inline GLenum LogicOp(Pica::Regs::LogicOp op) { static const GLenum logic_op_table[] = { - GL_CLEAR, // Clear - GL_AND, // And - GL_AND_REVERSE, // AndReverse - GL_COPY, // Copy - GL_SET, // Set - GL_COPY_INVERTED, // CopyInverted - GL_NOOP, // NoOp - GL_INVERT, // Invert - GL_NAND, // Nand - GL_OR, // Or - GL_NOR, // Nor - GL_XOR, // Xor - GL_EQUIV, // Equiv - GL_AND_INVERTED, // AndInverted - GL_OR_REVERSE, // OrReverse - GL_OR_INVERTED, // OrInverted + GL_CLEAR, // Clear + GL_AND, // And + GL_AND_REVERSE, // AndReverse + GL_COPY, // Copy + GL_SET, // Set + GL_COPY_INVERTED, // CopyInverted + GL_NOOP, // NoOp + GL_INVERT, // Invert + GL_NAND, // Nand + GL_OR, // Or + GL_NOR, // Nor + GL_XOR, // Xor + GL_EQUIV, // Equiv + GL_AND_INVERTED, // AndInverted + GL_OR_REVERSE, // OrReverse + GL_OR_INVERTED, // OrInverted }; // Range check table for input @@ -185,14 +182,14 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { inline GLenum StencilOp(Pica::Regs::StencilAction action) { static const GLenum stencil_op_table[] = { - GL_KEEP, // StencilAction::Keep - GL_ZERO, // StencilAction::Zero - GL_REPLACE, // StencilAction::Replace - GL_INCR, // StencilAction::Increment - GL_DECR, // StencilAction::Decrement - GL_INVERT, // StencilAction::Invert - GL_INCR_WRAP, // StencilAction::IncrementWrap - GL_DECR_WRAP // StencilAction::DecrementWrap + GL_KEEP, // StencilAction::Keep + GL_ZERO, // StencilAction::Zero + GL_REPLACE, // StencilAction::Replace + GL_INCR, // StencilAction::Increment + GL_DECR, // StencilAction::Decrement + GL_INVERT, // StencilAction::Invert + GL_INCR_WRAP, // StencilAction::IncrementWrap + GL_DECR_WRAP, // StencilAction::DecrementWrap }; // Range check table for input @@ -207,18 +204,16 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) { } inline GLvec4 ColorRGBA8(const u32 color) { - return { { (color >> 0 & 0xFF) / 255.0f, - (color >> 8 & 0xFF) / 255.0f, - (color >> 16 & 0xFF) / 255.0f, - (color >> 24 & 0xFF) / 255.0f - } }; + return {{ + (color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f, (color >> 16 & 0xFF) / 255.0f, + (color >> 24 & 0xFF) / 255.0f, + }}; } inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { - return { { color.r / 255.0f, - color.g / 255.0f, - color.b / 255.0f - } }; + return {{ + color.r / 255.0f, color.g / 255.0f, color.b / 255.0f, + }}; } } // namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 8410e0a64..03a588364 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -6,23 +6,19 @@ #include <cstddef> #include <cstdlib> #include <memory> - #include <glad/glad.h> - #include "common/assert.h" #include "common/bit_field.h" #include "common/emu_window.h" #include "common/logging/log.h" #include "common/profiler_reporting.h" #include "common/synchronized_wrapper.h" - #include "core/hw/gpu.h" #include "core/hw/hw.h" #include "core/hw/lcd.h" #include "core/memory.h" #include "core/settings.h" #include "core/tracer/recorder.h" - #include "video_core/debug_utils/debug_utils.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -87,24 +83,25 @@ struct ScreenRectVertex { * by a 3x2 matrix. */ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { - std::array<GLfloat, 3 * 2> matrix; + std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order + // clang-format off matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; // Last matrix row is implicitly assumed to be [0, 0, 1]. + // clang-format on return matrix; } /// RendererOpenGL constructor RendererOpenGL::RendererOpenGL() { - resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); + resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight; } /// RendererOpenGL destructor -RendererOpenGL::~RendererOpenGL() { -} +RendererOpenGL::~RendererOpenGL() {} /// Swap buffers (render frame) void RendererOpenGL::SwapBuffers() { @@ -116,13 +113,15 @@ void RendererOpenGL::SwapBuffers() { const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 - u32 lcd_color_addr = (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); + u32 lcd_color_addr = + (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; LCD::Regs::ColorFill color_fill = {0}; LCD::Read(color_fill.raw, lcd_color_addr); if (color_fill.is_enabled) { - LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture); + LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, + screen_infos[i].texture); // Resize the texture in case the framebuffer size has changed screen_infos[i].texture.width = 1; @@ -172,15 +171,14 @@ void RendererOpenGL::SwapBuffers() { * Loads framebuffer from emulated memory into the active OpenGL texture. */ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, - ScreenInfo& screen_info) { + ScreenInfo& screen_info) { - const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? - framebuffer.address_left1 : framebuffer.address_left2; + const PAddr framebuffer_addr = + framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2; LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x", - framebuffer.stride * framebuffer.height, - framebuffer_addr, (int)framebuffer.width, - (int)framebuffer.height, (int)framebuffer.format); + framebuffer.stride * framebuffer.height, framebuffer_addr, (int)framebuffer.width, + (int)framebuffer.height, (int)framebuffer.format); int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); size_t pixel_stride = framebuffer.stride / bpp; @@ -192,7 +190,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) { + if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, + static_cast<u32>(pixel_stride), screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); @@ -208,12 +207,13 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); // Update existing texture - // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they - // differ from the LCD resolution. + // TODO: Test what happens on hardware when you change the framebuffer dimensions so that + // they differ from the LCD resolution. // TODO: Applications could theoretically crash Citra here by specifying too large // framebuffer sizes. We should make sure that this cannot happen. glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, - screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data); + screen_info.texture.gl_format, screen_info.texture.gl_type, + framebuffer_data); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); @@ -223,9 +223,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram } /** - * Fills active OpenGL texture with the given RGB color. - * Since the color is solid, the texture can be 1x1 but will stretch across whatever it's rendered on. - * This has the added benefit of being *really fast*. + * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can + * be 1x1 but will stretch across whatever it's rendered on. */ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture) { @@ -233,7 +232,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color state.Apply(); glActiveTexture(GL_TEXTURE0); - u8 framebuffer_data[3] = { color_r, color_g, color_b }; + u8 framebuffer_data[3] = {color_r, color_g, color_b}; // Update existing texture glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); @@ -246,7 +245,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color * Initializes the OpenGL state and creates persistent objects. */ void RendererOpenGL::InitOpenGLObjects() { - glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, + 0.0f); // Link shaders and get variable locations shader.Create(vertex_shader, fragment_shader); @@ -270,8 +270,10 @@ void RendererOpenGL::InitOpenGLObjects() { // Attach vertex data to VAO glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); - glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, position)); - glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); + glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), + (GLvoid*)offsetof(ScreenRectVertex, position)); + glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), + (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); glEnableVertexAttribArray(attrib_position); glEnableVertexAttribArray(attrib_tex_coord); @@ -352,23 +354,25 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, glActiveTexture(GL_TEXTURE0); glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, - texture.gl_format, texture.gl_type, nullptr); + texture.gl_format, texture.gl_type, nullptr); state.texture_units[0].texture_2d = 0; state.Apply(); } /** - * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. + * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD + * rotation. */ -void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) { +void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, + float w, float h) { auto& texcoords = screen_info.display_texcoords; std::array<ScreenRectVertex, 4> vertices = {{ - ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), - ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right), - ScreenRectVertex(x, y+h, texcoords.top, texcoords.left), - ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right), + ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right), }}; state.texture_units[0].texture_2d = screen_info.display_texture; @@ -391,25 +395,26 @@ void RendererOpenGL::DrawScreens() { glClear(GL_COLOR_BUFFER_BIT); // Set projection matrix - std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, - (float)layout.height); + std::array<GLfloat, 3 * 2> ortho_matrix = + MakeOrthographicMatrix((float)layout.width, (float)layout.height); glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); // Bind texture in Texture Unit 0 glActiveTexture(GL_TEXTURE0); glUniform1i(uniform_color_texture, 0); - DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top, - (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); - DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, - (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); + DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, + (float)layout.top_screen.top, (float)layout.top_screen.GetWidth(), + (float)layout.top_screen.GetHeight()); + DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left, + (float)layout.bottom_screen.top, (float)layout.bottom_screen.GetWidth(), + (float)layout.bottom_screen.GetHeight()); m_current_frame++; } /// Updates the framerate -void RendererOpenGL::UpdateFramerate() { -} +void RendererOpenGL::UpdateFramerate() {} /** * Set the emulator window to use for renderer @@ -420,14 +425,16 @@ void RendererOpenGL::SetWindow(EmuWindow* window) { } static const char* GetSource(GLenum source) { -#define RET(s) case GL_DEBUG_SOURCE_##s: return #s +#define RET(s) \ + case GL_DEBUG_SOURCE_##s: \ + return #s switch (source) { - RET(API); - RET(WINDOW_SYSTEM); - RET(SHADER_COMPILER); - RET(THIRD_PARTY); - RET(APPLICATION); - RET(OTHER); + RET(API); + RET(WINDOW_SYSTEM); + RET(SHADER_COMPILER); + RET(THIRD_PARTY); + RET(APPLICATION); + RET(OTHER); default: UNREACHABLE(); } @@ -435,23 +442,25 @@ static const char* GetSource(GLenum source) { } static const char* GetType(GLenum type) { -#define RET(t) case GL_DEBUG_TYPE_##t: return #t +#define RET(t) \ + case GL_DEBUG_TYPE_##t: \ + return #t switch (type) { - RET(ERROR); - RET(DEPRECATED_BEHAVIOR); - RET(UNDEFINED_BEHAVIOR); - RET(PORTABILITY); - RET(PERFORMANCE); - RET(OTHER); - RET(MARKER); + RET(ERROR); + RET(DEPRECATED_BEHAVIOR); + RET(UNDEFINED_BEHAVIOR); + RET(PORTABILITY); + RET(PERFORMANCE); + RET(OTHER); + RET(MARKER); default: UNREACHABLE(); } #undef RET } -static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, - const GLchar* message, const void* user_param) { +static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, + GLsizei length, const GLchar* message, const void* user_param) { Log::Level level; switch (severity) { case GL_DEBUG_SEVERITY_HIGH: @@ -465,8 +474,8 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum level = Log::Level::Debug; break; } - LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", - GetSource(source), GetType(type), id, message); + LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", GetSource(source), GetType(type), + id, message); } /// Initialize the renderer @@ -493,5 +502,4 @@ bool RendererOpenGL::Init() { } /// Shutdown the renderer -void RendererOpenGL::ShutDown() { -} +void RendererOpenGL::ShutDown() {} diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 00e1044ab..87c556cff 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -5,14 +5,10 @@ #pragma once #include <array> - #include <glad/glad.h> - #include "common/common_types.h" #include "common/math_util.h" - #include "core/hw/gpu.h" - #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_state.h" @@ -38,7 +34,6 @@ struct ScreenInfo { class RendererOpenGL : public RendererBase { public: - RendererOpenGL(); ~RendererOpenGL() override; @@ -67,15 +62,14 @@ private: // Loads framebuffer from emulated memory into the display information structure void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, - ScreenInfo& screen_info); + ScreenInfo& screen_info); // Fills active OpenGL texture with the given RGB color. - void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, - const TextureInfo& texture); + void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); - EmuWindow* render_window; ///< Handle to render window + EmuWindow* render_window; ///< Handle to render window - int resolution_width; ///< Current resolution width - int resolution_height; ///< Current resolution height + int resolution_width; ///< Current resolution width + int resolution_height; ///< Current resolution height OpenGLState state; @@ -83,10 +77,14 @@ private: OGLVertexArray vertex_array; OGLBuffer vertex_buffer; OGLShader shader; - std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively + + /// Display information for top and bottom screens respectively + std::array<ScreenInfo, 2> screen_infos; + // Shader uniform location indices GLuint uniform_modelview_matrix; GLuint uniform_color_texture; + // Shader attribute input indices GLuint attrib_position; GLuint attrib_tex_coord; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index f565e2c91..272f3ffe1 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -7,23 +7,18 @@ #include <cstring> #include <unordered_map> #include <utility> - #include <boost/range/algorithm/fill.hpp> - #include "common/bit_field.h" #include "common/hash.h" #include "common/logging/log.h" #include "common/microprofile.h" - #include "video_core/pica.h" #include "video_core/pica_state.h" #include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" - #ifdef ARCHITECTURE_x86_64 #include "video_core/shader/shader_jit_x64.h" #endif // ARCHITECTURE_x86_64 - #include "video_core/video_core.h" namespace Pica { @@ -46,10 +41,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { const auto& output_register_map = g_state.regs.vs_output_attributes[index]; - u32 semantics[4] = { - output_register_map.map_x, output_register_map.map_y, - output_register_map.map_z, output_register_map.map_w - }; + u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, + output_register_map.map_z, output_register_map.map_w}; for (unsigned comp = 0; comp < 4; ++comp) { float24* out = ((float24*)&ret) + semantics[comp]; @@ -65,19 +58,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { index++; } - // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation + // The hardware takes the absolute and saturates vertex colors like this, *before* doing + // interpolation for (unsigned i = 0; i < 4; ++i) { - ret.color[i] = float24::FromFloat32( - std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); + ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); } LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " - "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", - ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), - ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), - ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), - ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); + "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", + ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), + ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), + ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), + ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), + ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(), + ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); return ret; } @@ -96,8 +90,9 @@ void ClearCache() { void ShaderSetup::Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ - Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); + u64 cache_key = + Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ + Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)); auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { @@ -127,7 +122,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num const auto& attribute_register_map = config.input_register_map; for (unsigned i = 0; i < num_attributes; i++) - state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; + state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; state.conditional_code[0] = false; state.conditional_code[1] = false; @@ -140,10 +135,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num #else RunInterpreter(setup, state, config.main_offset); #endif // ARCHITECTURE_x86_64 - } -DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { +DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, + const Regs::ShaderConfig& config, + const ShaderSetup& setup) { UnitState<true> state; state.debug.max_offset = 0; @@ -155,7 +151,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ boost::fill(state.registers.input, &dummy_register); for (unsigned i = 0; i < num_attributes; i++) - state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; + state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; state.conditional_code[0] = false; state.conditional_code[1] = false; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index fee16df62..8858d67f8 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -9,16 +9,12 @@ #include <memory> #include <type_traits> #include <vector> - #include <boost/container/static_vector.hpp> - #include <nihstro/shader_bytecode.h> - #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/vector_math.h" - #include "video_core/pica.h" #include "video_core/pica_types.h" @@ -94,46 +90,46 @@ struct OutputRegisters { static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); // Helper structure used to keep track of data useful for inspection of shader emulation -template<bool full_debugging> +template <bool full_debugging> struct DebugData; -template<> +template <> struct DebugData<false> { // TODO: Hide these behind and interface and move them to DebugData<true> - u32 max_offset; // maximum program counter ever reached + u32 max_offset; // maximum program counter ever reached u32 max_opdesc_id; // maximum swizzle pattern index ever used }; -template<> +template <> struct DebugData<true> { // Records store the input and output operands of a particular instruction. struct Record { enum Type { // Floating point arithmetic operands - SRC1 = 0x1, - SRC2 = 0x2, - SRC3 = 0x4, + SRC1 = 0x1, + SRC2 = 0x2, + SRC3 = 0x4, // Initial and final output operand value - DEST_IN = 0x8, - DEST_OUT = 0x10, + DEST_IN = 0x8, + DEST_OUT = 0x10, // Current and next instruction offset (in words) - CUR_INSTR = 0x20, - NEXT_INSTR = 0x40, + CUR_INSTR = 0x20, + NEXT_INSTR = 0x40, // Output address register value ADDR_REG_OUT = 0x80, // Result of a comparison instruction - CMP_RESULT = 0x100, + CMP_RESULT = 0x100, // Input values for conditional flow control instructions COND_BOOL_IN = 0x200, - COND_CMP_IN = 0x400, + COND_CMP_IN = 0x400, // Input values for a loop - LOOP_INT_IN = 0x800, + LOOP_INT_IN = 0x800, }; Math::Vec4<float24> src1; @@ -156,7 +152,7 @@ struct DebugData<true> { unsigned mask = 0; }; - u32 max_offset; // maximum program counter ever reached + u32 max_offset; // maximum program counter ever reached u32 max_opdesc_id; // maximum swizzle pattern index ever used // List of records for each executed shader instruction @@ -167,10 +163,10 @@ struct DebugData<true> { using DebugDataRecord = DebugData<true>::Record; // Helper function to set a DebugData<true>::Record field based on the template enum parameter. -template<DebugDataRecord::Type type, typename ValueType> +template <DebugDataRecord::Type type, typename ValueType> inline void SetField(DebugDataRecord& record, ValueType value); -template<> +template <> inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { record.src1.x = value[0]; record.src1.y = value[1]; @@ -178,7 +174,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va record.src1.w = value[3]; } -template<> +template <> inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { record.src2.x = value[0]; record.src2.y = value[1]; @@ -186,7 +182,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va record.src2.w = value[3]; } -template<> +template <> inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { record.src3.x = value[0]; record.src3.y = value[1]; @@ -194,7 +190,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va record.src3.w = value[3]; } -template<> +template <> inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { record.dest_in.x = value[0]; record.dest_in.y = value[1]; @@ -202,7 +198,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* record.dest_in.w = value[3]; } -template<> +template <> inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { record.dest_out.x = value[0]; record.dest_out.y = value[1]; @@ -210,67 +206,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24 record.dest_out.w = value[3]; } -template<> +template <> inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { record.address_registers[0] = value[0]; record.address_registers[1] = value[1]; } -template<> +template <> inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { record.conditional_code[0] = value[0]; record.conditional_code[1] = value[1]; } -template<> +template <> inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { record.cond_bool = value; } -template<> +template <> inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { record.cond_cmp[0] = value[0]; record.cond_cmp[1] = value[1]; } -template<> +template <> inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { record.loop_int = value; } -template<> +template <> inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { record.instruction_offset = value; } -template<> +template <> inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { record.next_instruction = value; } // Helper function to set debug information on the current shader iteration. -template<DebugDataRecord::Type type, typename ValueType> +template <DebugDataRecord::Type type, typename ValueType> inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { // Debugging disabled => nothing to do } -template<DebugDataRecord::Type type, typename ValueType> +template <DebugDataRecord::Type type, typename ValueType> inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { if (offset >= debug_data.records.size()) debug_data.records.resize(offset + 1); - SetField<type, ValueType>(debug_data.records[offset], value); - debug_data.records[offset].mask |= type; + SetField<type, ValueType>(debug_data.records[offset], value); + debug_data.records[offset].mask |= type; } - /** * This structure contains the state information that needs to be unique for a shader unit. The 3DS * has four shader units that process shaders in parallel. At the present, Citra only implements a * single shader unit that processes all shaders serially. Putting the state information in a struct * here will make it easier for us to parallelize the shader processing later. */ -template<bool Debug> +template <bool Debug> struct UnitState { struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore @@ -293,10 +288,12 @@ struct UnitState { static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: - return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.input) + + reg.GetIndex() * sizeof(Math::Vec4<float24>); case RegisterType::Temporary: - return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.temporary) + + reg.GetIndex() * sizeof(Math::Vec4<float24>); default: UNREACHABLE(); @@ -307,10 +304,12 @@ struct UnitState { static size_t OutputOffset(const DestRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Output: - return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, output_registers.value) + + reg.GetIndex() * sizeof(Math::Vec4<float24>); case RegisterType::Temporary: - return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.temporary) + + reg.GetIndex() * sizeof(Math::Vec4<float24>); default: UNREACHABLE(); @@ -336,13 +335,13 @@ struct ShaderSetup { static size_t UniformOffset(RegisterType type, unsigned index) { switch (type) { case RegisterType::FloatUniform: - return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); + return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); case RegisterType::BoolUniform: - return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); + return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); case RegisterType::IntUniform: - return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); + return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); default: UNREACHABLE(); @@ -354,8 +353,8 @@ struct ShaderSetup { std::array<u32, 1024> swizzle_data; /** - * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per - * vertex, which would happen within the `Run` function). + * Performs any shader unit setup that only needs to happen once per shader (as opposed to once + * per vertex, which would happen within the `Run` function). */ void Setup(); @@ -375,8 +374,8 @@ struct ShaderSetup { * @param setup Setup object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ - DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); - + DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, + const Regs::ShaderConfig& config, const ShaderSetup& setup); }; } // namespace Shader diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index f6c86a759..501d00b6b 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -6,14 +6,11 @@ #include <array> #include <cmath> #include <numeric> - #include <nihstro/shader_bytecode.h> - #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/vector_math.h" - #include "video_core/pica_state.h" #include "video_core/pica_types.h" #include "video_core/shader/shader.h" @@ -40,7 +37,7 @@ struct CallStackElement { u32 loop_address; // The address where we'll return to after each loop iteration }; -template<bool Debug> +template <bool Debug> void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { // TODO: Is there a maximal size for this? boost::container::static_vector<CallStackElement, 16> call_stack; @@ -74,14 +71,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned } } - const Instruction instr = { program_code[program_counter] }; - const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; + const Instruction instr = {program_code[program_counter]}; + const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; - auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, - u32 return_offset, u8 repeat_count, u8 loop_increment) { - program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, + u32 num_instructions, u32 return_offset, + u8 repeat_count, u8 loop_increment) { + // -1 to make sure when incrementing the PC we end up at the correct offset + program_counter = offset - 1; ASSERT(call_stack.size() < call_stack.capacity()); - call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); + call_stack.push_back( + {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); }; Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); if (iteration > 0) @@ -106,24 +106,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned }; switch (instr.opcode.Value().GetInfo().type) { - case OpCode::Type::Arithmetic: - { - const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); + case OpCode::Type::Arithmetic: { + const bool is_inverted = + (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); - const int address_offset = (instr.common.address_register_index == 0) - ? 0 : state.address_registers[instr.common.address_register_index - 1]; + const int address_offset = + (instr.common.address_register_index == 0) + ? 0 + : state.address_registers[instr.common.address_register_index - 1]; - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); - const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + + (!is_inverted * address_offset)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + + (is_inverted * address_offset)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); float24 src1[4] = { - src1_[(int)swizzle.GetSelectorSrc1(0)], - src1_[(int)swizzle.GetSelectorSrc1(1)], - src1_[(int)swizzle.GetSelectorSrc1(2)], - src1_[(int)swizzle.GetSelectorSrc1(3)], + src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], + src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], }; if (negate_src1) { src1[0] = src1[0] * float24::FromFloat32(-1); @@ -132,10 +134,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned src1[3] = src1[3] * float24::FromFloat32(-1); } float24 src2[4] = { - src2_[(int)swizzle.GetSelectorSrc2(0)], - src2_[(int)swizzle.GetSelectorSrc2(1)], - src2_[(int)swizzle.GetSelectorSrc2(2)], - src2_[(int)swizzle.GetSelectorSrc2(3)], + src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], + src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], }; if (negate_src2) { src2[0] = src2[0] * float24::FromFloat32(-1); @@ -144,15 +144,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned src2[3] = src2[3] * float24::FromFloat32(-1); } - float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] - : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] - : dummy_vec4_float24; + float24* dest = + (instr.common.dest.Value() < 0x10) + ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] + : (instr.common.dest.Value() < 0x20) + ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] + : dummy_vec4_float24; - state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); + state.debug.max_opdesc_id = + std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id); switch (instr.opcode.Value().EffectiveOpCode()) { - case OpCode::Id::ADD: - { + case OpCode::Id::ADD: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); @@ -166,8 +169,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; } - case OpCode::Id::MUL: - { + case OpCode::Id::MUL: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); @@ -228,8 +230,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned case OpCode::Id::DP3: case OpCode::Id::DP4: case OpCode::Id::DPH: - case OpCode::Id::DPHI: - { + case OpCode::Id::DPHI: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); @@ -239,7 +240,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned src1[3] = float24::FromFloat32(1.0f); int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; - float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f)); + float24 dot = std::inner_product(src1, src1 + num_components, src2, + float24::FromFloat32(0.f)); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -252,8 +254,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned } // Reciprocal - case OpCode::Id::RCP: - { + case OpCode::Id::RCP: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); @@ -268,8 +269,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned } // Reciprocal Square Root - case OpCode::Id::RSQ: - { + case OpCode::Id::RSQ: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); @@ -283,8 +283,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; } - case OpCode::Id::MOVA: - { + case OpCode::Id::MOVA: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); for (int i = 0; i < 2; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -293,12 +292,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned // TODO: Figure out how the rounding is done on hardware state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); } - Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); + Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, + state.address_registers); break; } - case OpCode::Id::MOV: - { + case OpCode::Id::MOV: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); for (int i = 0; i < 4; ++i) { @@ -320,7 +319,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned if (!swizzle.DestComponentEnabled(i)) continue; - dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) + : float24::FromFloat32(0.0f); } Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); break; @@ -334,7 +334,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned if (!swizzle.DestComponentEnabled(i)) continue; - dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) + : float24::FromFloat32(0.0f); } Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); break; @@ -349,40 +350,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); switch (op) { - case Instruction::Common::CompareOpType::Equal: - state.conditional_code[i] = (src1[i] == src2[i]); - break; + case Instruction::Common::CompareOpType::Equal: + state.conditional_code[i] = (src1[i] == src2[i]); + break; - case Instruction::Common::CompareOpType::NotEqual: - state.conditional_code[i] = (src1[i] != src2[i]); - break; + case Instruction::Common::CompareOpType::NotEqual: + state.conditional_code[i] = (src1[i] != src2[i]); + break; - case Instruction::Common::CompareOpType::LessThan: - state.conditional_code[i] = (src1[i] < src2[i]); - break; + case Instruction::Common::CompareOpType::LessThan: + state.conditional_code[i] = (src1[i] < src2[i]); + break; - case Instruction::Common::CompareOpType::LessEqual: - state.conditional_code[i] = (src1[i] <= src2[i]); - break; + case Instruction::Common::CompareOpType::LessEqual: + state.conditional_code[i] = (src1[i] <= src2[i]); + break; - case Instruction::Common::CompareOpType::GreaterThan: - state.conditional_code[i] = (src1[i] > src2[i]); - break; + case Instruction::Common::CompareOpType::GreaterThan: + state.conditional_code[i] = (src1[i] > src2[i]); + break; - case Instruction::Common::CompareOpType::GreaterEqual: - state.conditional_code[i] = (src1[i] >= src2[i]); - break; + case Instruction::Common::CompareOpType::GreaterEqual: + state.conditional_code[i] = (src1[i] >= src2[i]); + break; - default: - LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); - break; + default: + LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); + break; } } Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); break; - case OpCode::Id::EX2: - { + case OpCode::Id::EX2: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); @@ -399,8 +399,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; } - case OpCode::Id::LG2: - { + case OpCode::Id::LG2: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); @@ -419,7 +418,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), + instr.opcode.Value().GetInfo().name, instr.hex); DEBUG_ASSERT(false); break; } @@ -427,30 +427,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; } - case OpCode::Type::MultiplyAdd: - { + case OpCode::Type::MultiplyAdd: { if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { - const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]); + const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>( + &swizzle_data[instr.mad.operand_desc_id]); bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); - const int address_offset = (instr.mad.address_register_index == 0) - ? 0 : state.address_registers[instr.mad.address_register_index - 1]; + const int address_offset = + (instr.mad.address_register_index == 0) + ? 0 + : state.address_registers[instr.mad.address_register_index - 1]; const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); - const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); - const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); + const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + + (!is_inverted * address_offset)); + const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + + (is_inverted * address_offset)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); const bool negate_src3 = ((bool)swizzle.negate_src3 != false); float24 src1[4] = { - src1_[(int)swizzle.GetSelectorSrc1(0)], - src1_[(int)swizzle.GetSelectorSrc1(1)], - src1_[(int)swizzle.GetSelectorSrc1(2)], - src1_[(int)swizzle.GetSelectorSrc1(3)], + src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], + src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], }; if (negate_src1) { src1[0] = src1[0] * float24::FromFloat32(-1); @@ -459,10 +461,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned src1[3] = src1[3] * float24::FromFloat32(-1); } float24 src2[4] = { - src2_[(int)swizzle.GetSelectorSrc2(0)], - src2_[(int)swizzle.GetSelectorSrc2(1)], - src2_[(int)swizzle.GetSelectorSrc2(2)], - src2_[(int)swizzle.GetSelectorSrc2(3)], + src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], + src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], }; if (negate_src2) { src2[0] = src2[0] * float24::FromFloat32(-1); @@ -471,10 +471,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned src2[3] = src2[3] * float24::FromFloat32(-1); } float24 src3[4] = { - src3_[(int)swizzle.GetSelectorSrc3(0)], - src3_[(int)swizzle.GetSelectorSrc3(1)], - src3_[(int)swizzle.GetSelectorSrc3(2)], - src3_[(int)swizzle.GetSelectorSrc3(3)], + src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)], + src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)], }; if (negate_src3) { src3[0] = src3[0] * float24::FromFloat32(-1); @@ -483,9 +481,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned src3[3] = src3[3] * float24::FromFloat32(-1); } - float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] - : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] - : dummy_vec4_float24; + float24* dest = + (instr.mad.dest.Value() < 0x10) + ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] + : (instr.mad.dest.Value() < 0x20) + ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] + : dummy_vec4_float24; Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); @@ -500,16 +501,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); } else { LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), + instr.opcode.Value().GetInfo().name, instr.hex); } break; } - default: - { - static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { - bool results[2] = { refx == state.conditional_code[0], - refy == state.conditional_code[1] }; + default: { + static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, + Instruction::FlowControlType flow_control) { + bool results[2] = {refx == state.conditional_code[0], + refy == state.conditional_code[1]}; switch (flow_control.op) { case flow_control.Or: @@ -533,44 +535,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; case OpCode::Id::JMPC: - Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { + Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, + state.conditional_code); + if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, + instr.flow_control)) { program_counter = instr.flow_control.dest_offset - 1; } break; case OpCode::Id::JMPU: - Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + Record<DebugDataRecord::COND_BOOL_IN>( + state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); - if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { + if (uniforms.b[instr.flow_control.bool_uniform_id] == + !(instr.flow_control.num_instructions & 1)) { program_counter = instr.flow_control.dest_offset - 1; } break; case OpCode::Id::CALL: - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, program_counter + 1, 0, 0); break; case OpCode::Id::CALLU: - Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + Record<DebugDataRecord::COND_BOOL_IN>( + state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - program_counter + 1, 0, 0); + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + program_counter + 1, 0, 0); } break; case OpCode::Id::CALLC: - Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - program_counter + 1, 0, 0); + Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, + state.conditional_code); + if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, + instr.flow_control)) { + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + program_counter + 1, 0, 0); } break; @@ -578,43 +581,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; case OpCode::Id::IFU: - Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + Record<DebugDataRecord::COND_BOOL_IN>( + state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state, - program_counter + 1, + call(state, program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } else { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } break; - case OpCode::Id::IFC: - { + case OpCode::Id::IFC: { // TODO: Do we need to consider swizzlers here? - Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { - call(state, - program_counter + 1, + Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, + state.conditional_code); + if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, + instr.flow_control)) { + call(state, program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } else { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } break; } - case OpCode::Id::LOOP: - { + case OpCode::Id::LOOP: { Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, uniforms.i[instr.flow_control.int_uniform_id].y, uniforms.i[instr.flow_control.int_uniform_id].z, @@ -622,18 +624,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned state.address_registers[2] = loop_param.y; Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); - call(state, - program_counter + 1, + call(state, program_counter + 1, instr.flow_control.dest_offset - program_counter + 1, - instr.flow_control.dest_offset + 1, - loop_param.x, - loop_param.z); + instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); break; } default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), + instr.opcode.Value().GetInfo().name, instr.hex); break; } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index bb3ce1c6e..48ede0a2e 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -8,9 +8,10 @@ namespace Pica { namespace Shader { -template <bool Debug> struct UnitState; +template <bool Debug> +struct UnitState; -template<bool Debug> +template <bool Debug> void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 43e7e6b4c..211c703ab 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -5,20 +5,16 @@ #include <algorithm> #include <cmath> #include <cstdint> -#include <xmmintrin.h> - #include <nihstro/shader_bytecode.h> - +#include <xmmintrin.h> #include "common/assert.h" #include "common/logging/log.h" #include "common/vector_math.h" #include "common/x64/abi.h" #include "common/x64/cpu_detect.h" #include "common/x64/emitter.h" - #include "shader.h" #include "shader_jit_x64.h" - #include "video_core/pica_state.h" #include "video_core/pica_types.h" @@ -31,70 +27,70 @@ using namespace Gen; typedef void (JitShader::*JitFunction)(Instruction instr); const JitFunction instr_table[64] = { - &JitShader::Compile_ADD, // add - &JitShader::Compile_DP3, // dp3 - &JitShader::Compile_DP4, // dp4 - &JitShader::Compile_DPH, // dph - nullptr, // unknown - &JitShader::Compile_EX2, // ex2 - &JitShader::Compile_LG2, // lg2 - nullptr, // unknown - &JitShader::Compile_MUL, // mul - &JitShader::Compile_SGE, // sge - &JitShader::Compile_SLT, // slt - &JitShader::Compile_FLR, // flr - &JitShader::Compile_MAX, // max - &JitShader::Compile_MIN, // min - &JitShader::Compile_RCP, // rcp - &JitShader::Compile_RSQ, // rsq - nullptr, // unknown - nullptr, // unknown - &JitShader::Compile_MOVA, // mova - &JitShader::Compile_MOV, // mov - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - &JitShader::Compile_DPH, // dphi - nullptr, // unknown - &JitShader::Compile_SGE, // sgei - &JitShader::Compile_SLT, // slti - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - &JitShader::Compile_NOP, // nop - &JitShader::Compile_END, // end - nullptr, // break - &JitShader::Compile_CALL, // call - &JitShader::Compile_CALLC, // callc - &JitShader::Compile_CALLU, // callu - &JitShader::Compile_IF, // ifu - &JitShader::Compile_IF, // ifc - &JitShader::Compile_LOOP, // loop - nullptr, // emit - nullptr, // sete - &JitShader::Compile_JMP, // jmpc - &JitShader::Compile_JMP, // jmpu - &JitShader::Compile_CMP, // cmp - &JitShader::Compile_CMP, // cmp - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad + &JitShader::Compile_ADD, // add + &JitShader::Compile_DP3, // dp3 + &JitShader::Compile_DP4, // dp4 + &JitShader::Compile_DPH, // dph + nullptr, // unknown + &JitShader::Compile_EX2, // ex2 + &JitShader::Compile_LG2, // lg2 + nullptr, // unknown + &JitShader::Compile_MUL, // mul + &JitShader::Compile_SGE, // sge + &JitShader::Compile_SLT, // slt + &JitShader::Compile_FLR, // flr + &JitShader::Compile_MAX, // max + &JitShader::Compile_MIN, // min + &JitShader::Compile_RCP, // rcp + &JitShader::Compile_RSQ, // rsq + nullptr, // unknown + nullptr, // unknown + &JitShader::Compile_MOVA, // mova + &JitShader::Compile_MOV, // mov + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + &JitShader::Compile_DPH, // dphi + nullptr, // unknown + &JitShader::Compile_SGE, // sgei + &JitShader::Compile_SLT, // slti + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + &JitShader::Compile_NOP, // nop + &JitShader::Compile_END, // end + nullptr, // break + &JitShader::Compile_CALL, // call + &JitShader::Compile_CALLC, // callc + &JitShader::Compile_CALLU, // callu + &JitShader::Compile_IF, // ifu + &JitShader::Compile_IF, // ifc + &JitShader::Compile_LOOP, // loop + nullptr, // emit + nullptr, // sete + &JitShader::Compile_JMP, // jmpc + &JitShader::Compile_JMP, // jmpu + &JitShader::Compile_CMP, // cmp + &JitShader::Compile_CMP, // cmp + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad }; // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can @@ -136,9 +132,9 @@ static const X64Reg NEGBIT = XMM15; // State registers that must not be modified by external functions calls // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed static const BitSet32 persistent_regs = { - SETUP, STATE, // Pointers to register blocks + SETUP, STATE, // Pointers to register blocks ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers - ONE+16, NEGBIT+16, // Constants + ONE + 16, NEGBIT + 16, // Constants }; /// Raw constant for the source register selector that indicates no swizzling is performed @@ -152,7 +148,7 @@ static const u8 NO_DEST_REG_MASK = 0xf; * @return Instruction at the specified offset */ static Instruction GetVertexShaderInstruction(size_t offset) { - return { g_state.vs.program_code[offset] }; + return {g_state.vs.program_code[offset]}; } static void LogCritical(const char* msg) { @@ -172,7 +168,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) { * @param src_reg SourceRegister object corresponding to the source register to load * @param dest Destination XMM register to store the loaded, swizzled source register */ -void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { +void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, + X64Reg dest) { X64Reg src_ptr; size_t src_offset; @@ -189,7 +186,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe unsigned operand_desc_id; - const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); + const bool is_inverted = + (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); unsigned address_register_index; unsigned offset_src; @@ -225,7 +223,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); } - SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; + SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; // Generate instructions for source register swizzling as needed u8 sel = swiz.GetRawSelector(src_num); @@ -238,13 +236,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe } // If the source register should be negated, flip the negative bit using XOR - const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 }; + const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3}; if (negate[src_num - 1]) { XORPS(dest, R(NEGBIT)); } } -void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { +void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) { DestRegister dest; unsigned operand_desc_id; if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || @@ -256,10 +254,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { dest = instr.common.dest.Value(); } - SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; + SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); - ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type"); + ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), + "Destinaton offset too large for int type"); // If all components are enabled, write the result to the destination register if (swiz.dest_mask == NO_DEST_REG_MASK) { @@ -267,18 +266,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { MOVAPS(MDisp(STATE, dest_offset_disp), src); } else { - // Not all components are enabled, so mask the result when storing to the destination register... + // Not all components are enabled, so mask the result when storing to the destination + // register... MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); if (Common::GetCPUCaps().sse4_1) { - u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); + u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | + ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); BLENDPS(SCRATCH, R(src), mask); } else { MOVAPS(SCRATCH2, R(src)); UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination - UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination + UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination - // Compute selector to selectively copy source components to destination for SHUFPS instruction + // Compute selector to selectively copy source components to destination for SHUFPS + // instruction u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | @@ -336,7 +338,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { } void JitShader::Compile_UniformCondition(Instruction instr) { - int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); + int offset = + ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); } @@ -512,7 +515,7 @@ void JitShader::Compile_MIN(Instruction instr) { } void JitShader::Compile_MOVA(Instruction instr) { - SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; + SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]}; if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { return; // NoOp @@ -583,8 +586,7 @@ void JitShader::Compile_RSQ(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitShader::Compile_NOP(Instruction instr) { -} +void JitShader::Compile_NOP(Instruction instr) {} void JitShader::Compile_END(Instruction instr) { ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); @@ -597,7 +599,7 @@ void JitShader::Compile_CALL(Instruction instr) { // Call the subroutine FixupBranch b = CALL(); - fixup_branches.push_back({ b, instr.flow_control.dest_offset }); + fixup_branches.push_back({b, instr.flow_control.dest_offset}); // Skip over the return offset that's on the stack ADD(64, R(RSP), Imm32(8)); @@ -628,7 +630,7 @@ void JitShader::Compile_CMP(Instruction instr) { // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here // because they don't match when used with NaNs. - static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; + static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE}; bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; @@ -678,7 +680,8 @@ void JitShader::Compile_MAD(Instruction instr) { } void JitShader::Compile_IF(Instruction instr) { - Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); + Compile_Assert(instr.flow_control.dest_offset >= program_counter, + "Backwards if-statements not supported"); // Evaluate the "IF" condition if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -709,29 +712,31 @@ void JitShader::Compile_IF(Instruction instr) { } void JitShader::Compile_LOOP(Instruction instr) { - Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); + Compile_Assert(instr.flow_control.dest_offset >= program_counter, + "Backwards loops not supported"); Compile_Assert(!looping, "Nested loops not supported"); looping = true; - int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); + int offset = + ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); SHR(32, R(LOOPCOUNT_REG), Imm8(8)); AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start MOV(32, R(LOOPINC), R(LOOPCOUNT)); SHR(32, R(LOOPINC), Imm8(16)); - MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer + MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count - ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 + ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 auto loop_start = GetCodePtr(); Compile_Block(instr.flow_control.dest_offset + 1); ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component - SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 - J_CC(CC_NZ, loop_start); // Loop if not equal + SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 + J_CC(CC_NZ, loop_start); // Loop if not equal looping = false; } @@ -744,11 +749,11 @@ void JitShader::Compile_JMP(Instruction instr) { else UNREACHABLE(); - bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && - (instr.flow_control.num_instructions & 1); + bool inverted_condition = + (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1); FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); - fixup_branches.push_back({ b, instr.flow_control.dest_offset }); + fixup_branches.push_back({b, instr.flow_control.dest_offset}); } void JitShader::Compile_Block(unsigned end) { @@ -773,7 +778,8 @@ void JitShader::Compile_NextInstr() { Compile_Return(); } - ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); + ASSERT_MSG(code_ptr[program_counter] == nullptr, + "Tried to compile already compiled shader location!"); code_ptr[program_counter] = GetCodePtr(); Instruction instr = GetVertexShaderInstruction(program_counter++); @@ -787,7 +793,7 @@ void JitShader::Compile_NextInstr() { } else { // Unhandled instruction LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", - instr.opcode.Value().EffectiveOpCode(), instr.hex); + instr.opcode.Value().EffectiveOpCode(), instr.hex); } } @@ -801,7 +807,8 @@ void JitShader::FindReturnOffsets() { case OpCode::Id::CALL: case OpCode::Id::CALLC: case OpCode::Id::CALLU: - return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); + return_offsets.push_back(instr.flow_control.dest_offset + + instr.flow_control.num_instructions); break; default: break; @@ -835,12 +842,12 @@ void JitShader::Compile() { XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); // Used to set a register to one - static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; + static const __m128 one = {1.f, 1.f, 1.f, 1.f}; MOV(PTRBITS, R(RAX), ImmPtr(&one)); MOVAPS(ONE, MatR(RAX)); // Used to negate registers - static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; + static const __m128 neg = {-0.f, -0.f, -0.f, -0.f}; MOV(PTRBITS, R(RAX), ImmPtr(&neg)); MOVAPS(NEGBIT, MatR(RAX)); @@ -850,7 +857,8 @@ void JitShader::Compile() { // Compile entire program Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); - // Set the target for any incomplete branches now that the entire shader program has been emitted + // Set the target for any incomplete branches now that the entire shader program has been + // emitted for (const auto& branch : fixup_branches) { SetJumpTarget(branch.first, code_ptr[branch.second]); } @@ -861,7 +869,8 @@ void JitShader::Compile() { fixup_branches.clear(); fixup_branches.shrink_to_fit(); - uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); + uintptr_t size = + reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5468459d4..98de5ecef 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -8,13 +8,10 @@ #include <cstddef> #include <utility> #include <vector> - #include <nihstro/shader_bytecode.h> - #include "common/bit_set.h" #include "common/common_types.h" #include "common/x64/emitter.h" - #include "video_core/shader/shader.h" using nihstro::Instruction; @@ -70,11 +67,11 @@ public: void Compile_MAD(Instruction instr); private: - void Compile_Block(unsigned end); void Compile_NextInstr(); - void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); + void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, + Gen::X64Reg dest); void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); /** @@ -111,8 +108,8 @@ private: /// Offsets in code where a return needs to be inserted std::vector<unsigned> return_offsets; - unsigned program_counter = 0; ///< Offset of the next instruction to decode - bool looping = false; ///< True if compiling a loop, used to check for nested loops + unsigned program_counter = 0; ///< Offset of the next instruction to decode + bool looping = false; ///< True if compiling a loop, used to check for nested loops /// Branches that need to be fixed up once the entire shader program is compiled std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer.cpp index 03df15b01..9cd21f72b 100644 --- a/src/video_core/swrasterizer.cpp +++ b/src/video_core/swrasterizer.cpp @@ -8,9 +8,8 @@ namespace VideoCore { void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) { + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) { Pica::Clipper::ProcessTriangle(v0, v1, v2); } - } diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h index 0a028b774..6d42d7409 100644 --- a/src/video_core/swrasterizer.h +++ b/src/video_core/swrasterizer.h @@ -5,7 +5,6 @@ #pragma once #include "common/common_types.h" - #include "video_core/rasterizer_interface.h" namespace Pica { @@ -17,14 +16,12 @@ struct OutputVertex; namespace VideoCore { class SWRasterizer : public RasterizerInterface { - void AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) override; + void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) override; void DrawTriangles() override {} void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} void FlushRegion(PAddr addr, u32 size) override {} void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} }; - } diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index e40f0f1ee..2b8ef7018 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -1,16 +1,12 @@ #include <memory> - #include <boost/range/algorithm/fill.hpp> - #include "common/alignment.h" #include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/vector_math.h" - #include "core/memory.h" - #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" #include "video_core/pica_state.h" @@ -41,24 +37,32 @@ void VertexLoader::Setup(const Pica::Regs& regs) { // TODO: What happens if a loader overwrites a previous one's data? for (unsigned component = 0; component < loader_config.component_count; ++component) { if (component >= 12) { - LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); + LOG_ERROR(HW_GPU, + "Overflow in the vertex attribute loader %u trying to load component %u", + loader, component); continue; } u32 attribute_index = loader_config.GetComponent(component); if (attribute_index < 12) { - offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); + offset = Common::AlignUp(offset, + attribute_config.GetElementSizeInBytes(attribute_index)); vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; - vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); - vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); - vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); + vertex_attribute_strides[attribute_index] = + static_cast<u32>(loader_config.byte_count); + vertex_attribute_formats[attribute_index] = + attribute_config.GetFormat(attribute_index); + vertex_attribute_elements[attribute_index] = + attribute_config.GetNumElements(attribute_index); offset += attribute_config.GetStride(attribute_index); } else if (attribute_index < 16) { - // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, + // respectively offset = Common::AlignUp(offset, 4); offset += (attribute_index - 11) * 4; } else { - UNREACHABLE(); // This is truly unreachable due to the number of bits for each component + UNREACHABLE(); // This is truly unreachable due to the number of bits for each + // component } } } @@ -66,48 +70,55 @@ void VertexLoader::Setup(const Pica::Regs& regs) { is_setup = true; } -void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { +void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, + DebugUtils::MemoryAccessTracker& memory_accesses) { ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { // Load per-vertex data from the loader arrays - u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; + u32 source_addr = + base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; if (g_debug_context && Pica::g_debug_context->recorder) { - memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * ( - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 - : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1)); + memory_accesses.AddAccess( + source_addr, + vertex_attribute_elements[i] * + ((vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) + ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) + ? 2 + : 1)); } switch (vertex_attribute_formats[i]) { - case Regs::VertexAttributeFormat::BYTE: - { - const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); + case Regs::VertexAttributeFormat::BYTE: { + const s8* srcdata = + reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); } break; } - case Regs::VertexAttributeFormat::UBYTE: - { - const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); + case Regs::VertexAttributeFormat::UBYTE: { + const u8* srcdata = + reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); } break; } - case Regs::VertexAttributeFormat::SHORT: - { - const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); + case Regs::VertexAttributeFormat::SHORT: { + const s16* srcdata = + reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); } break; } - case Regs::VertexAttributeFormat::FLOAT: - { - const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); + case Regs::VertexAttributeFormat::FLOAT: { + const float* srcdata = + reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); } @@ -119,22 +130,23 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I // is *not* carried over from the default attribute settings even if they're // enabled for this attribute. for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { - input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + input.attr[i][comp] = + comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); } - LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", - vertex_attribute_elements[i], i, vertex, index, - base_address, - vertex_attribute_sources[i], - vertex_attribute_strides[i] * vertex, - input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); + LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from " + "0x%08x + 0x%08x + 0x%04x: %f %f %f %f", + vertex_attribute_elements[i], i, vertex, index, base_address, + vertex_attribute_sources[i], vertex_attribute_strides[i] * vertex, + input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), + input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); } else if (vertex_attribute_is_default[i]) { // Load the default attribute if we're configured to do so input.attr[i] = g_state.vs_default_attributes[i]; - LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", - i, vertex, index, - input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), - input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); + LOG_TRACE(HW_GPU, + "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, + vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), + input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); } else { // TODO(yuriks): In this case, no data gets loaded and the vertex // remains with the last value it had. This isn't currently maintained @@ -143,4 +155,4 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I } } -} // namespace Pica +} // namespace Pica diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index ac162c254..9f2098bb2 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h @@ -1,7 +1,6 @@ #pragma once #include <array> - #include "common/common_types.h" #include "video_core/pica.h" @@ -23,9 +22,12 @@ public: } void Setup(const Pica::Regs& regs); - void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses); + void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, + DebugUtils::MemoryAccessTracker& memory_accesses); - int GetNumTotalAttributes() const { return num_total_attributes; } + int GetNumTotalAttributes() const { + return num_total_attributes; + } private: std::array<u32, 16> vertex_attribute_sources; @@ -37,4 +39,4 @@ private: bool is_setup = false; }; -} // namespace Pica +} // namespace Pica diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index bd6e5eb6b..83e33dfc2 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -3,21 +3,19 @@ // Refer to the license.txt file included. #include <memory> - #include "common/logging/log.h" - #include "video_core/pica.h" #include "video_core/renderer_base.h" -#include "video_core/video_core.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/video_core.h" //////////////////////////////////////////////////////////////////////////////////////////////////// // Video Core namespace namespace VideoCore { -EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window -std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin +EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window +std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin std::atomic<bool> g_hw_renderer_enabled; std::atomic<bool> g_shader_jit_enabled; diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 30267489e..e2d725ab1 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -22,18 +22,19 @@ namespace VideoCore { // framebuffers in video memory are stored in column-major order and rendered sideways, causing // the widths and heights of the framebuffers read by the LCD to be switched compared to the // heights and widths of the screens listed here. -static const int kScreenTopWidth = 400; ///< 3DS top screen width -static const int kScreenTopHeight = 240; ///< 3DS top screen height -static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width -static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height +static const int kScreenTopWidth = 400; ///< 3DS top screen width +static const int kScreenTopHeight = 240; ///< 3DS top screen height +static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width +static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height // Video core renderer // --------------------- -extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin -extern EmuWindow* g_emu_window; ///< Emu window +extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin +extern EmuWindow* g_emu_window; ///< Emu window -// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) +// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from +// qt ui) extern std::atomic<bool> g_hw_renderer_enabled; extern std::atomic<bool> g_shader_jit_enabled; extern std::atomic<bool> g_scaled_resolution_enabled; |