diff options
Diffstat (limited to 'src/video_core')
45 files changed, 3148 insertions, 1644 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 09ecc5bad..a780215c1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -21,6 +21,7 @@ add_library(video_core STATIC macro_interpreter.h memory_manager.cpp memory_manager.h + rasterizer_cache.cpp rasterizer_cache.h rasterizer_interface.h renderer_base.cpp @@ -33,6 +34,7 @@ add_library(video_core STATIC renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer_cache.cpp renderer_opengl/gl_rasterizer_cache.h + renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h @@ -51,6 +53,10 @@ add_library(video_core STATIC renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h + renderer_opengl/utils.cpp + renderer_opengl/utils.h + surface.cpp + surface.h textures/astc.cpp textures/astc.h textures/decoders.cpp diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f1aa6091b..28e8c13aa 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -81,7 +81,7 @@ void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { for (auto entry : commands) { Tegra::GPUVAddr address = entry.Address(); u32 size = entry.sz; - const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); + const std::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); VAddr current_addr = *head_address; while (current_addr < *head_address + size * sizeof(CommandHeader)) { const CommandHeader header = {Memory::Read32(current_addr)}; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index bca014a4a..6de07ea56 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <cinttypes> +#include <cstring> #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" @@ -19,21 +20,69 @@ namespace Tegra::Engines { constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {} + : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) { + InitializeRegisterDefaults(); +} + +void Maxwell3D::InitializeRegisterDefaults() { + // Initializes registers to their default values - what games expect them to be at boot. This is + // for certain registers that may not be explicitly set by games. + + // Reset all registers to zero + std::memset(®s, 0, sizeof(regs)); + + // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is + // needed for ARMS. + for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) { + regs.viewport[viewport].depth_range_near = 0.0f; + regs.viewport[viewport].depth_range_far = 1.0f; + } + // Doom and Bomberman seems to use the uninitialized registers and just enable blend + // so initialize blend registers with sane values + regs.blend.equation_rgb = Regs::Blend::Equation::Add; + regs.blend.factor_source_rgb = Regs::Blend::Factor::One; + regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero; + regs.blend.equation_a = Regs::Blend::Equation::Add; + regs.blend.factor_source_a = Regs::Blend::Factor::One; + regs.blend.factor_dest_a = Regs::Blend::Factor::Zero; + for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) { + regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add; + regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One; + regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero; + regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add; + regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One; + regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero; + } + regs.stencil_front_op_fail = Regs::StencilOp::Keep; + regs.stencil_front_op_zfail = Regs::StencilOp::Keep; + regs.stencil_front_op_zpass = Regs::StencilOp::Keep; + regs.stencil_front_func_func = Regs::ComparisonOp::Always; + regs.stencil_front_func_mask = 0xFFFFFFFF; + regs.stencil_front_mask = 0xFFFFFFFF; + regs.stencil_two_side_enable = 1; + regs.stencil_back_op_fail = Regs::StencilOp::Keep; + regs.stencil_back_op_zfail = Regs::StencilOp::Keep; + regs.stencil_back_op_zpass = Regs::StencilOp::Keep; + regs.stencil_back_func_func = Regs::ComparisonOp::Always; + regs.stencil_back_func_mask = 0xFFFFFFFF; + regs.stencil_back_mask = 0xFFFFFFFF; +} void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { // Reset the current macro. executing_macro = 0; - // The requested macro must have been uploaded already. - auto macro_code = uploaded_macros.find(method); - if (macro_code == uploaded_macros.end()) { - LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); + // Lookup the macro offset + const u32 entry{(method - MacroRegistersStart) >> 1}; + const auto& search{macro_offsets.find(entry)}; + if (search == macro_offsets.end()) { + LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); + UNREACHABLE(); return; } // Execute the current macro. - macro_interpreter.Execute(macro_code->second, std::move(parameters)); + macro_interpreter.Execute(search->second, std::move(parameters)); } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { @@ -72,13 +121,23 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } - regs.reg_array[method] = value; + if (regs.reg_array[method] != value) { + regs.reg_array[method] = value; + if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && + method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { + dirty_flags.vertex_attrib_format = true; + } + } switch (method) { case MAXWELL3D_REG_INDEX(macros.data): { ProcessMacroUpload(value); break; } + case MAXWELL3D_REG_INDEX(macros.bind): { + ProcessMacroBind(value); + break; + } case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): @@ -140,22 +199,25 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { } void Maxwell3D::ProcessMacroUpload(u32 data) { - // Store the uploaded macro code to interpret them when they're called. - auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; - macro.push_back(data); + ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), + "upload_address exceeded macro_memory size!"); + macro_memory[regs.macros.upload_address++] = data; +} + +void Maxwell3D::ProcessMacroBind(u32 data) { + macro_offsets[regs.macros.entry] = data; } void Maxwell3D::ProcessQueryGet() { GPUVAddr sequence_address = regs.query.QueryAddress(); // Since the sequence address is given as a GPU VAddr, we have to convert it to an application // VAddr before writing. - boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address); + std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address); // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - u32 value = Memory::Read32(*address); u64 result = 0; // TODO(Subv): Support the other query variables @@ -268,7 +330,7 @@ void Maxwell3D::ProcessCBData(u32 value) { // Don't allow writing past the end of the buffer. ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); - boost::optional<VAddr> address = + std::optional<VAddr> address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); Memory::Write32(*address, value); @@ -281,7 +343,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { GPUVAddr tic_base_address = regs.tic.TICAddress(); GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); - boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); + std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); Texture::TICEntry tic_entry; Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); @@ -305,7 +367,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); - boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); + std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); Texture::TSCEntry tsc_entry; Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); @@ -369,7 +431,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); + std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)}; Texture::FullTextureInfo tex_info{}; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0e09a7ee5..91ca57883 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -345,6 +345,14 @@ public: Invert = 6, IncrWrap = 7, DecrWrap = 8, + KeepOGL = 0x1E00, + ZeroOGL = 0, + ReplaceOGL = 0x1E01, + IncrOGL = 0x1E02, + DecrOGL = 0x1E03, + InvertOGL = 0x150A, + IncrWrapOGL = 0x8507, + DecrWrapOGL = 0x8508, }; enum class MemoryLayout : u32 { @@ -462,6 +470,16 @@ public: } }; + struct ColorMask { + union { + u32 raw; + BitField<0, 4, u32> R; + BitField<4, 4, u32> G; + BitField<8, 4, u32> B; + BitField<12, 4, u32> A; + }; + }; + bool IsShaderConfigEnabled(std::size_t index) const { // The VertexB is always enabled. if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { @@ -475,12 +493,13 @@ public: INSERT_PADDING_WORDS(0x45); struct { - INSERT_PADDING_WORDS(1); + u32 upload_address; u32 data; u32 entry; + u32 bind; } macros; - INSERT_PADDING_WORDS(0x189); + INSERT_PADDING_WORDS(0x188); u32 tfb_enabled; @@ -570,7 +589,11 @@ public: u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_PADDING_WORDS(0x13); + INSERT_PADDING_WORDS(0xC); + + u32 color_mask_common; + + INSERT_PADDING_WORDS(0x6); u32 rt_separate_frag_data; @@ -645,8 +668,14 @@ public: ComparisonOp depth_test_func; float alpha_test_ref; ComparisonOp alpha_test_func; - - INSERT_PADDING_WORDS(0x9); + u32 draw_tfb_stride; + struct { + float r; + float g; + float b; + float a; + } blend_color; + INSERT_PADDING_WORDS(0x4); struct { u32 separate_alpha; @@ -723,7 +752,11 @@ public: StencilOp stencil_back_op_zpass; ComparisonOp stencil_back_func_func; - INSERT_PADDING_WORDS(0x17); + INSERT_PADDING_WORDS(0x4); + + u32 framebuffer_srgb; + + INSERT_PADDING_WORDS(0x12); union { BitField<2, 1, u32> coord_origin; @@ -751,7 +784,14 @@ public: }; } draw; - INSERT_PADDING_WORDS(0x6B); + INSERT_PADDING_WORDS(0xA); + + struct { + u32 enabled; + u32 index; + } primitive_restart; + + INSERT_PADDING_WORDS(0x5F); struct { u32 start_addr_high; @@ -829,8 +869,9 @@ public: BitField<6, 4, u32> RT; BitField<10, 11, u32> layer; } clear_buffers; - - INSERT_PADDING_WORDS(0x4B); + INSERT_PADDING_WORDS(0xB); + std::array<ColorMask, NumRenderTargets> color_mask; + INSERT_PADDING_WORDS(0x38); struct { u32 query_address_high; @@ -971,6 +1012,12 @@ public: State state{}; MemoryManager& memory_manager; + struct DirtyFlags { + bool vertex_attrib_format = true; + }; + + DirtyFlags dirty_flags; + /// Reads a register value located at the input method address u32 GetRegisterValue(u32 method) const; @@ -983,10 +1030,25 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than + /// we've seen used. + using MacroMemory = std::array<u32, 0x40000>; + + /// Gets a reference to macro memory. + const MacroMemory& GetMacroMemory() const { + return macro_memory; + } + private: + void InitializeRegisterDefaults(); + VideoCore::RasterizerInterface& rasterizer; - std::unordered_map<u32, std::vector<u32>> uploaded_macros; + /// Start offsets of each macro in macro_memory + std::unordered_map<u32, u32> macro_offsets; + + /// Memory for macro code + MacroMemory macro_memory; /// Macro method that is currently being executed / being fed parameters. u32 executing_macro = 0; @@ -1009,9 +1071,12 @@ private: */ void CallMacroMethod(u32 method, std::vector<u32> parameters); - /// Handles writes to the macro uploading registers. + /// Handles writes to the macro uploading register. void ProcessMacroUpload(u32 data); + /// Handles writes to the macro bind register. + void ProcessMacroBind(u32 data); + /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(); @@ -1045,6 +1110,7 @@ ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); @@ -1057,6 +1123,10 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB); ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2); ASSERT_REG_POSITION(depth_test_func, 0x4C3); +ASSERT_REG_POSITION(alpha_test_ref, 0x4C4); +ASSERT_REG_POSITION(alpha_test_func, 0x4C5); +ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6); +ASSERT_REG_POSITION(blend_color, 0x4C7); ASSERT_REG_POSITION(blend, 0x4CF); ASSERT_REG_POSITION(stencil_enable, 0x4E0); ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1); @@ -1077,14 +1147,17 @@ ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567); ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); ASSERT_REG_POSITION(stencil_back_func_func, 0x569); +ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); +ASSERT_REG_POSITION(primitive_restart, 0x591); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(cull, 0x646); ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); +ASSERT_REG_POSITION(color_mask, 0x680); ASSERT_REG_POSITION(query, 0x6C0); ASSERT_REG_POSITION(vertex_array[0], 0x700); ASSERT_REG_POSITION(independent_blend, 0x780); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6cd08d28b..83a6fd875 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -5,12 +5,11 @@ #pragma once #include <bitset> +#include <optional> #include <string> #include <tuple> #include <vector> -#include <boost/optional.hpp> - #include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" @@ -79,6 +78,7 @@ union Attribute { constexpr explicit Attribute(u64 value) : value(value) {} enum class Index : u64 { + PointSize = 6, Position = 7, Attribute_0 = 8, Attribute_31 = 39, @@ -207,6 +207,16 @@ enum class UniformType : u64 { Double = 5, }; +enum class StoreType : u64 { + Unsigned8 = 0, + Signed8 = 1, + Unsigned16 = 2, + Signed16 = 3, + Bytes32 = 4, + Bytes64 = 5, + Bytes128 = 6, +}; + enum class IMinMaxExchange : u64 { None = 0, XLo = 1, @@ -568,6 +578,10 @@ union Instruction { } fmul32; union { + BitField<52, 1, u64> generates_cc; + } op_32; + + union { BitField<48, 1, u64> is_signed; } shift; @@ -747,6 +761,18 @@ union Instruction { } ld_c; union { + BitField<48, 3, StoreType> type; + } ldst_sl; + + union { + BitField<44, 2, u64> unknown; + } ld_l; + + union { + BitField<44, 2, u64> unknown; + } st_l; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -1208,6 +1234,8 @@ union Instruction { BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; + BitField<20, 24, s64> smem_imm; + BitField<0, 5, ControlCode> flow_control_code; Attribute attribute; Sampler sampler; @@ -1231,8 +1259,12 @@ public: BRA, PBK, LD_A, + LD_L, + LD_S, LD_C, ST_A, + ST_L, + ST_S, LDG, // Load from global memory STG, // Store in global memory TEX, @@ -1428,7 +1460,7 @@ public: Type type; }; - static boost::optional<const Matcher&> Decode(Instruction instr) { + static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) { static const auto table{GetDecodeTable()}; const auto matches_instruction = [instr](const auto& matcher) { @@ -1436,7 +1468,8 @@ public: }; auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none; + return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter) + : std::nullopt; } private: @@ -1489,8 +1522,12 @@ private: INST("111000110100---", Id::BRK, Type::Flow, "BRK"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), + INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), + INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), + INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), + INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), @@ -1626,4 +1663,4 @@ private: } }; -} // namespace Tegra::Shader
\ No newline at end of file +} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index a885ee3cf..a0e015c4b 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -96,6 +96,11 @@ struct Header { } } ps; }; + + u64 GetLocalMemorySize() { + return (common1.shader_local_memory_low_size | + (common2.shader_local_memory_high_size << 24)); + } }; static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 377bd66ab..335a8d407 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -11,7 +11,7 @@ namespace Tegra { MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} -void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) { +void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { Reset(); registers[1] = parameters[0]; this->parameters = std::move(parameters); @@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa // Execute the code until we hit an exit condition. bool keep_executing = true; while (keep_executing) { - keep_executing = Step(code, false); + keep_executing = Step(offset, false); } // Assert the the macro used all the input parameters @@ -29,7 +29,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa void MacroInterpreter::Reset() { registers = {}; pc = 0; - delayed_pc = boost::none; + delayed_pc = {}; method_address.raw = 0; parameters.clear(); // The next parameter index starts at 1, because $r1 already has the value of the first @@ -37,17 +37,17 @@ void MacroInterpreter::Reset() { next_parameter_index = 1; } -bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { +bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { u32 base_address = pc; - Opcode opcode = GetOpcode(code); + Opcode opcode = GetOpcode(offset); pc += 4; // Update the program counter if we were delayed - if (delayed_pc != boost::none) { + if (delayed_pc) { ASSERT(is_delay_slot); pc = *delayed_pc; - delayed_pc = boost::none; + delayed_pc = {}; } switch (opcode.operation) { @@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { delayed_pc = base_address + opcode.GetBranchTarget(); // Execute one more instruction due to the delay slot. - return Step(code, true); + return Step(offset, true); } break; } @@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { // Exit has a delay slot, execute the next instruction // Note: Executing an exit during a branch delay slot will cause the instruction at the // branch target to be executed before exiting. - Step(code, true); + Step(offset, true); return false; } return true; } -MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const { +MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { + const auto& macro_memory{maxwell3d.GetMacroMemory()}; ASSERT((pc % sizeof(u32)) == 0); - ASSERT(pc < code.size() * sizeof(u32)); - return {code[pc / sizeof(u32)]}; + ASSERT((pc + offset) < macro_memory.size() * sizeof(u32)); + return {macro_memory[offset + pc / sizeof(u32)]}; } u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index cee0baaf3..62d1ce289 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -5,8 +5,9 @@ #pragma once #include <array> +#include <optional> #include <vector> -#include <boost/optional.hpp> + #include "common/bit_field.h" #include "common/common_types.h" @@ -21,10 +22,10 @@ public: /** * Executes the macro code with the specified input parameters. - * @param code The macro byte code to execute - * @param parameters The parameters of the macro + * @param offset Offset to start execution at. + * @param parameters The parameters of the macro. */ - void Execute(const std::vector<u32>& code, std::vector<u32> parameters); + void Execute(u32 offset, std::vector<u32> parameters); private: enum class Operation : u32 { @@ -109,11 +110,11 @@ private: /** * Executes a single macro instruction located at the current program counter. Returns whether * the interpreter should keep running. - * @param code The macro code to execute. + * @param offset Offset to start execution at. * @param is_delay_slot Whether the current step is being executed due to a delay slot in a * previous instruction. */ - bool Step(const std::vector<u32>& code, bool is_delay_slot); + bool Step(u32 offset, bool is_delay_slot); /// Calculates the result of an ALU operation. src_a OP src_b; u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; @@ -126,7 +127,7 @@ private: bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; /// Reads an opcode at the current program counter location. - Opcode GetOpcode(const std::vector<u32>& code) const; + Opcode GetOpcode(u32 offset) const; /// Returns the specified register's value. Register 0 is hardcoded to always return 0. u32 GetRegister(u32 register_id) const; @@ -149,7 +150,7 @@ private: Engines::Maxwell3D& maxwell3d; u32 pc; ///< Current program counter - boost::optional<u32> + std::optional<u32> delayed_pc; ///< Program counter to execute at after the delay slot is executed. static constexpr std::size_t NumMacroRegisters = 8; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 022d4ab74..77a20bb84 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -4,18 +4,21 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/logging/log.h" #include "video_core/memory_manager.h" namespace Tegra { GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { - boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align); - ASSERT(gpu_addr); + const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; - for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - VAddr& slot = PageSlot(*gpu_addr + offset); + ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); + + for (u64 offset{}; offset < size; offset += PAGE_SIZE) { + VAddr& slot{PageSlot(*gpu_addr + offset)}; ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); + slot = static_cast<u64>(PageStatus::Allocated); } @@ -23,10 +26,11 @@ GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { } GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { - for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - VAddr& slot = PageSlot(gpu_addr + offset); + for (u64 offset{}; offset < size; offset += PAGE_SIZE) { + VAddr& slot{PageSlot(gpu_addr + offset)}; ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); + slot = static_cast<u64>(PageStatus::Allocated); } @@ -34,17 +38,19 @@ GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { } GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { - boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE); - ASSERT(gpu_addr); + const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)}; + + ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); - for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - VAddr& slot = PageSlot(*gpu_addr + offset); + for (u64 offset{}; offset < size; offset += PAGE_SIZE) { + VAddr& slot{PageSlot(*gpu_addr + offset)}; ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); + slot = cpu_addr + offset; } - MappedRegion region{cpu_addr, *gpu_addr, size}; + const MappedRegion region{cpu_addr, *gpu_addr, size}; mapped_regions.push_back(region); return *gpu_addr; @@ -53,14 +59,31 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { ASSERT((gpu_addr & PAGE_MASK) == 0); - for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - VAddr& slot = PageSlot(gpu_addr + offset); + if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) { + // Page has been already mapped. In this case, we must find a new area of memory to use that + // is different than the specified one. Super Mario Odyssey hits this scenario when changing + // areas, but we do not want to overwrite the old pages. + // TODO(bunnei): We need to write a hardware test to confirm this behavior. + + LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr); + + const std::optional<GPUVAddr> new_gpu_addr{ + FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)}; + + ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory"); + + gpu_addr = *new_gpu_addr; + } + + for (u64 offset{}; offset < size; offset += PAGE_SIZE) { + VAddr& slot{PageSlot(gpu_addr + offset)}; ASSERT(slot == static_cast<u64>(PageStatus::Allocated)); + slot = cpu_addr + offset; } - MappedRegion region{cpu_addr, gpu_addr, size}; + const MappedRegion region{cpu_addr, gpu_addr, size}; mapped_regions.push_back(region); return gpu_addr; @@ -69,11 +92,12 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { ASSERT((gpu_addr & PAGE_MASK) == 0); - for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - VAddr& slot = PageSlot(gpu_addr + offset); + for (u64 offset{}; offset < size; offset += PAGE_SIZE) { + VAddr& slot{PageSlot(gpu_addr + offset)}; ASSERT(slot != static_cast<u64>(PageStatus::Allocated) && slot != static_cast<u64>(PageStatus::Unmapped)); + slot = static_cast<u64>(PageStatus::Unmapped); } @@ -97,13 +121,14 @@ GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const { return {}; } -boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) { - GPUVAddr gpu_addr = 0; - u64 free_space = 0; +std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, + PageStatus status) { + GPUVAddr gpu_addr{region_start}; + u64 free_space{}; align = (align + PAGE_MASK) & ~PAGE_MASK; while (gpu_addr + free_space < MAX_ADDRESS) { - if (!IsPageMapped(gpu_addr + free_space)) { + if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) { free_space += PAGE_SIZE; if (free_space >= size) { return gpu_addr; @@ -118,8 +143,8 @@ boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) { return {}; } -boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { - VAddr base_addr = PageSlot(gpu_addr); +std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { + const VAddr base_addr{PageSlot(gpu_addr)}; if (base_addr == static_cast<u64>(PageStatus::Allocated) || base_addr == static_cast<u64>(PageStatus::Unmapped)) { @@ -133,19 +158,15 @@ std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { std::vector<GPUVAddr> results; for (const auto& region : mapped_regions) { if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { - u64 offset = cpu_addr - region.cpu_addr; + const u64 offset{cpu_addr - region.cpu_addr}; results.push_back(region.gpu_addr + offset); } } return results; } -bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) { - return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped); -} - VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { - auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]; + auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]}; if (!block) { block = std::make_unique<PageBlock>(); block->fill(static_cast<VAddr>(PageStatus::Unmapped)); diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index caf80093f..4eb338aa2 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -6,10 +6,9 @@ #include <array> #include <memory> +#include <optional> #include <vector> -#include <boost/optional.hpp> - #include "common/common_types.h" namespace Tegra { @@ -27,7 +26,7 @@ public: GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size); GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); GPUVAddr GetRegionEnd(GPUVAddr region_start) const; - boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); + std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const; static constexpr u64 PAGE_BITS = 16; @@ -35,15 +34,15 @@ public: static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; private: - boost::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1); - bool IsPageMapped(GPUVAddr gpu_addr); - VAddr& PageSlot(GPUVAddr gpu_addr); - enum class PageStatus : u64 { Unmapped = 0xFFFFFFFFFFFFFFFFULL, Allocated = 0xFFFFFFFFFFFFFFFEULL, }; + std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, + PageStatus status); + VAddr& PageSlot(GPUVAddr gpu_addr); + static constexpr u64 MAX_ADDRESS{0x10000000000ULL}; static constexpr u64 PAGE_TABLE_BITS{10}; static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS}; diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp new file mode 100644 index 000000000..093b2cdf4 --- /dev/null +++ b/src/video_core/rasterizer_cache.cpp @@ -0,0 +1,7 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/rasterizer_cache.h" + +RasterizerCacheObject::~RasterizerCacheObject() = default; diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 0a3b3951e..bcf0c15a4 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -5,18 +5,19 @@ #pragma once #include <set> +#include <unordered_map> #include <boost/icl/interval_map.hpp> #include <boost/range/iterator_range_core.hpp> #include "common/common_types.h" -#include "core/core.h" #include "core/settings.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_base.h" class RasterizerCacheObject { public: + virtual ~RasterizerCacheObject(); + /// Gets the address of the shader in guest memory, required for cache management virtual VAddr GetAddr() const = 0; @@ -64,6 +65,8 @@ class RasterizerCache : NonCopyable { friend class RasterizerCacheObject; public: + explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} + /// Write any cached resources overlapping the specified region back to memory void FlushRegion(Tegra::GPUVAddr addr, size_t size) { const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -86,45 +89,39 @@ public: /// Invalidates everything in the cache void InvalidateAll() { - while (object_cache.begin() != object_cache.end()) { - Unregister(*object_cache.begin()->second.begin()); + while (interval_cache.begin() != interval_cache.end()) { + Unregister(*interval_cache.begin()->second.begin()); } } protected: /// Tries to get an object from the cache with the specified address T TryGet(VAddr addr) const { - const ObjectInterval interval{addr}; - for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) { - for (auto& cached_object : pair.second) { - if (cached_object->GetAddr() == addr) { - return cached_object; - } - } - } + const auto iter = map_cache.find(addr); + if (iter != map_cache.end()) + return iter->second; return nullptr; } /// Register an object into the cache void Register(const T& object) { object->SetIsRegistered(true); - object_cache.add({GetInterval(object), ObjectSet{object}}); - auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer(); + interval_cache.add({GetInterval(object), ObjectSet{object}}); + map_cache.insert({object->GetAddr(), object}); rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); } /// Unregisters an object from the cache void Unregister(const T& object) { object->SetIsRegistered(false); - auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer(); rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); - // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit if (Settings::values.use_accurate_gpu_emulation) { FlushObject(object); } - object_cache.subtract({GetInterval(object), ObjectSet{object}}); + interval_cache.subtract({GetInterval(object), ObjectSet{object}}); + map_cache.erase(object->GetAddr()); } /// Returns a ticks counter used for tracking when cached objects were last modified @@ -141,7 +138,7 @@ private: std::vector<T> objects; const ObjectInterval interval{addr, addr + size}; - for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) { + for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) { for (auto& cached_object : pair.second) { if (!cached_object) { continue; @@ -167,14 +164,17 @@ private: } using ObjectSet = std::set<T>; - using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>; - using ObjectInterval = typename ObjectCache::interval_type; + using ObjectCache = std::unordered_map<VAddr, T>; + using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; + using ObjectInterval = typename IntervalCache::interval_type; static auto GetInterval(const T& object) { return ObjectInterval::right_open(object->GetAddr(), object->GetAddr() + object->GetSizeInBytes()); } - ObjectCache object_cache; ///< Cache of objects - u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing + ObjectCache map_cache; + IntervalCache interval_cache; ///< Cache of objects + u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing + VideoCore::RasterizerInterface& rasterizer; }; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 0df3725c2..1482cdb40 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -5,7 +5,6 @@ #include "core/frontend/emu_window.h" #include "core/settings.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" namespace VideoCore { diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 2cd0738ff..669e26e15 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -6,7 +6,8 @@ #include <atomic> #include <memory> -#include <boost/optional.hpp> +#include <optional> + #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" @@ -28,7 +29,8 @@ public: virtual ~RendererBase(); /// Swap buffers (render frame) - virtual void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) = 0; + virtual void SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; /// Initialize the renderer virtual bool Init() = 0; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c142095c5..075192c3f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -9,15 +9,17 @@ #include "core/core.h" #include "core/memory.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" namespace OpenGL { -OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} +OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) + : RasterizerCache{rasterizer}, stream_buffer(GL_ARRAY_BUFFER, size) {} GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, bool cache) { auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index be29dc8be..91fca3f6c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -15,6 +15,8 @@ namespace OpenGL { +class RasterizerOpenGL; + struct CachedBufferEntry final : public RasterizerCacheObject { VAddr GetAddr() const override { return addr; @@ -35,7 +37,7 @@ struct CachedBufferEntry final : public RasterizerCacheObject { class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { public: - explicit OGLBufferCache(std::size_t size); + explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been /// allocated. diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp index ee1d9601b..d9ed08437 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -6,6 +6,7 @@ #include <array> #include "common/assert.h" #include "common/common_types.h" +#include "core/core.h" #include "core/memory.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_primitive_assembler.h" @@ -45,7 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; const u8* source{Memory::GetPointer(*cpu_addr)}; for (u32 primitive = 0; primitive < count / 4; ++primitive) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index be51c5215..84bd91eed 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -30,10 +30,11 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using PixelFormat = SurfaceParams::PixelFormat; -using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = VideoCore::Surface::PixelFormat; +using SurfaceType = VideoCore::Surface::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192)); @@ -79,7 +80,8 @@ struct DrawParameters { }; RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) - : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { + : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info}, + buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -104,9 +106,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo } ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); - - // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 - state.clip_distance[0] = true; + OpenGLState::ApplyDefaultState(); // Create render framebuffer framebuffer.Create(); @@ -115,8 +115,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo state.draw.shader_program = 0; state.Apply(); - glEnable(GL_BLEND); - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); @@ -124,18 +122,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo RasterizerOpenGL::~RasterizerOpenGL() {} -void RasterizerOpenGL::SetupVertexArrays() { - MICROPROFILE_SCOPE(OpenGL_VAO); - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); +void RasterizerOpenGL::SetupVertexFormat() { + auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; + if (!gpu.dirty_flags.vertex_attrib_format) + return; + gpu.dirty_flags.vertex_attrib_format = false; + + MICROPROFILE_SCOPE(OpenGL_VAO); + auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format); auto& VAO = iter->second; if (is_cache_miss) { VAO.Create(); state.draw.vertex_array = VAO.handle; - state.Apply(); + state.ApplyVertexBufferState(); // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work // around. @@ -177,8 +180,13 @@ void RasterizerOpenGL::SetupVertexArrays() { } } state.draw.vertex_array = VAO.handle; - state.draw.vertex_buffer = buffer_cache.GetHandle(); - state.Apply(); + state.ApplyVertexBufferState(); +} + +void RasterizerOpenGL::SetupVertexBuffer() { + MICROPROFILE_SCOPE(OpenGL_VB); + const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + const auto& regs = gpu.regs; // Upload all guest vertex arrays sequentially to our buffer for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { @@ -205,6 +213,9 @@ void RasterizerOpenGL::SetupVertexArrays() { glVertexBindingDivisor(index, 0); } } + + // Implicit set by glBindVertexBuffer. Stupid glstate handling... + state.draw.vertex_buffer = buffer_cache.GetHandle(); } DrawParameters RasterizerOpenGL::SetupDraw() { @@ -329,8 +340,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { index++; } } - - state.Apply(); } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -399,9 +408,9 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { cached_pages.add({pages_interval, delta}); } -void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, - bool preserve_contents, - boost::optional<std::size_t> single_color_target) { +void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb, + bool using_depth_fb, bool preserve_contents, + std::optional<std::size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; @@ -416,8 +425,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented"); // Bind the framebuffer surfaces - state.draw.draw_framebuffer = framebuffer.handle; - state.Apply(); + current_state.draw.draw_framebuffer = framebuffer.handle; + current_state.ApplyFramebufferState(); + current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; if (using_color_fb) { if (single_color_target) { @@ -429,6 +439,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. color_surface->MarkAsModified(true, res_cache); + // Workaround for and issue in nvidia drivers + // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ + state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } glFramebufferTexture2D( @@ -446,6 +459,11 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep // Assume that a surface will be written to if it is used as a framebuffer, even // if the shader doesn't actually write to it. color_surface->MarkAsModified(true, res_cache); + // Enable sRGB only for supported formats + // Workaround for and issue in nvidia drivers + // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ + state.framebuffer_srgb.enabled |= + color_surface->GetSurfaceParams().srgb_conversion; } buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); @@ -487,10 +505,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); } - - SyncViewport(); - - state.Apply(); + SyncViewport(current_state); } void RasterizerOpenGL::Clear() { @@ -503,22 +518,23 @@ void RasterizerOpenGL::Clear() { bool use_stencil{}; OpenGLState clear_state; - clear_state.draw.draw_framebuffer = framebuffer.handle; - clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; - clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; - clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; - clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; - if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A) { use_color = true; } + if (use_color) { + clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; + clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; + clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; + clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; + } if (regs.clear_buffers.Z) { ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); use_depth = true; // Always enable the depth write when clearing the depth buffer. The depth write mask is - // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. + // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to + // true. clear_state.depth.test_enabled = true; clear_state.depth.test_func = GL_ALWAYS; } @@ -535,9 +551,8 @@ void RasterizerOpenGL::Clear() { ScopeAcquireGLContext acquire_context{emu_window}; - ConfigureFramebuffers(use_color, use_depth || use_stencil, false, + ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); - clear_state.Apply(); if (use_color) { @@ -563,13 +578,14 @@ void RasterizerOpenGL::DrawArrays() { ScopeAcquireGLContext acquire_context{emu_window}; - ConfigureFramebuffers(); - + ConfigureFramebuffers(state); + SyncColorMask(); SyncDepthTestState(); SyncStencilTestState(); SyncBlendState(); SyncLogicOpState(); SyncCullMode(); + SyncPrimitiveRestart(); SyncScissorTest(); // Alpha Testing is synced on shaders. SyncTransformFeedback(); @@ -583,7 +599,7 @@ void RasterizerOpenGL::DrawArrays() { const bool is_indexed = accelerate_draw == AccelDraw::Indexed; state.draw.vertex_buffer = buffer_cache.GetHandle(); - state.Apply(); + state.ApplyVertexBufferState(); std::size_t buffer_size = CalculateVertexArraysSize(); @@ -610,7 +626,8 @@ void RasterizerOpenGL::DrawArrays() { buffer_cache.Map(buffer_size); - SetupVertexArrays(); + SetupVertexFormat(); + SetupVertexBuffer(); DrawParameters params = SetupDraw(); SetupShaders(params.primitive_mode); @@ -690,7 +707,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // Verify that the cached surface is the same size and format as the requested framebuffer const auto& params{surface->GetSurfaceParams()}; - const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)}; + const auto& pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); @@ -713,16 +731,20 @@ void RasterizerOpenGL::SamplerInfo::Create() { glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); } -void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { +void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::FullTextureInfo& info) { const GLuint s = sampler.handle; - + const Tegra::Texture::TSCEntry& config = info.tsc; if (mag_filter != config.mag_filter) { mag_filter = config.mag_filter; - glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, MaxwellToGL::TextureFilterMode(mag_filter)); + glSamplerParameteri( + s, GL_TEXTURE_MAG_FILTER, + MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); } - if (min_filter != config.min_filter) { + if (min_filter != config.min_filter || mip_filter != config.mip_filter) { min_filter = config.min_filter; - glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, MaxwellToGL::TextureFilterMode(min_filter)); + mip_filter = config.mip_filter; + glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, + MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); } if (wrap_u != config.wrap_u) { @@ -762,6 +784,22 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); } } + if (info.tic.use_header_opt_control == 0) { + if (GLAD_GL_ARB_texture_filter_anisotropic) { + glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, + static_cast<float>(1 << info.tic.max_anisotropy.Value())); + } else if (GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, + static_cast<float>(1 << info.tic.max_anisotropy.Value())); + } + glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, + static_cast<float>(info.tic.res_min_mip_level.Value())); + glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, + static_cast<float>(info.tic.res_max_mip_level.Value() == 0 + ? 16 + : info.tic.res_max_mip_level.Value())); + glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, info.tic.mip_lod_bias.Value() / 256.f); + } } u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader, @@ -859,7 +897,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, continue; } - texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); + texture_samplers[current_bindpoint].SyncWithConfig(texture); Surface surface = res_cache.GetTextureSurface(texture, entry); if (surface != nullptr) { state.texture_units[current_bindpoint].texture = surface->Texture().handle; @@ -881,14 +919,18 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, return current_unit + static_cast<u32>(entries.size()); } -void RasterizerOpenGL::SyncViewport() { +void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - - state.viewport.x = viewport_rect.left; - state.viewport.y = viewport_rect.bottom; - state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); - state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight()); + for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; + auto& viewport = current_state.viewports[i]; + viewport.x = viewport_rect.left; + viewport.y = viewport_rect.bottom; + viewport.width = static_cast<GLfloat>(viewport_rect.GetWidth()); + viewport.height = static_cast<GLfloat>(viewport_rect.GetHeight()); + viewport.depth_range_far = regs.viewport[i].depth_range_far; + viewport.depth_range_near = regs.viewport[i].depth_range_near; + } } void RasterizerOpenGL::SyncClipEnabled() { @@ -923,12 +965,11 @@ void RasterizerOpenGL::SyncCullMode() { } } -void RasterizerOpenGL::SyncDepthScale() { - UNREACHABLE(); -} +void RasterizerOpenGL::SyncPrimitiveRestart() { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; -void RasterizerOpenGL::SyncDepthOffset() { - UNREACHABLE(); + state.primitive_restart.enabled = regs.primitive_restart.enabled; + state.primitive_restart.index = regs.primitive_restart.index; } void RasterizerOpenGL::SyncDepthTestState() { @@ -951,9 +992,6 @@ void RasterizerOpenGL::SyncStencilTestState() { return; } - // TODO(bunnei): Verify behavior when this is not set - ASSERT(regs.stencil_two_side_enable); - state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); state.stencil.front.test_ref = regs.stencil_front_func_ref; state.stencil.front.test_mask = regs.stencil_front_func_mask; @@ -961,36 +999,79 @@ void RasterizerOpenGL::SyncStencilTestState() { state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail); state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass); state.stencil.front.write_mask = regs.stencil_front_mask; + if (regs.stencil_two_side_enable) { + state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); + state.stencil.back.test_ref = regs.stencil_back_func_ref; + state.stencil.back.test_mask = regs.stencil_back_func_mask; + state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); + state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); + state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); + state.stencil.back.write_mask = regs.stencil_back_mask; + } else { + state.stencil.back.test_func = GL_ALWAYS; + state.stencil.back.test_ref = 0; + state.stencil.back.test_mask = 0xFFFFFFFF; + state.stencil.back.write_mask = 0xFFFFFFFF; + state.stencil.back.action_stencil_fail = GL_KEEP; + state.stencil.back.action_depth_fail = GL_KEEP; + state.stencil.back.action_depth_pass = GL_KEEP; + } +} - state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); - state.stencil.back.test_ref = regs.stencil_back_func_ref; - state.stencil.back.test_mask = regs.stencil_back_func_mask; - state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); - state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); - state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); - state.stencil.back.write_mask = regs.stencil_back_mask; +void RasterizerOpenGL::SyncColorMask() { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i]; + auto& dest = state.color_mask[i]; + dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE; + dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE; + dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; + dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; + } } void RasterizerOpenGL::SyncBlendState() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - // TODO(Subv): Support more than just render target 0. - state.blend.enabled = regs.blend.enable[0] != 0; - - if (!state.blend.enabled) + state.blend_color.red = regs.blend_color.r; + state.blend_color.green = regs.blend_color.g; + state.blend_color.blue = regs.blend_color.b; + state.blend_color.alpha = regs.blend_color.a; + + state.independant_blend.enabled = regs.independent_blend_enable; + if (!state.independant_blend.enabled) { + auto& blend = state.blend[0]; + blend.enabled = regs.blend.enable[0] != 0; + blend.separate_alpha = regs.blend.separate_alpha; + blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb); + blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb); + blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb); + if (blend.separate_alpha) { + blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a); + blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a); + blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a); + } + for (size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + state.blend[i].enabled = false; + } return; + } - ASSERT_MSG(regs.logic_op.enable == 0, - "Blending and logic op can't be enabled at the same time."); - - ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented"); - ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented"); - state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb); - state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb); - state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb); - state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a); - state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a); - state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a); + for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + auto& blend = state.blend[i]; + blend.enabled = regs.blend.enable[i] != 0; + if (!blend.enabled) + continue; + blend.separate_alpha = regs.independent_blend[i].separate_alpha; + blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_rgb); + blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_rgb); + blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_rgb); + if (blend.separate_alpha) { + blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_a); + blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_a); + blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_a); + } + } } void RasterizerOpenGL::SyncLogicOpState() { @@ -1009,19 +1090,19 @@ void RasterizerOpenGL::SyncLogicOpState() { } void RasterizerOpenGL::SyncScissorTest() { + // TODO: what is the correct behavior here, a single scissor for all targets + // or scissor disabled for the rest of the targets? const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - state.scissor.enabled = (regs.scissor_test.enable != 0); - // TODO(Blinkhawk): Figure if the hardware supports scissor testing per viewport and how it's - // implemented. - if (regs.scissor_test.enable != 0) { - const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x; - const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y; - state.scissor.x = regs.scissor_test.min_x; - state.scissor.y = regs.scissor_test.min_y; - state.scissor.width = width; - state.scissor.height = height; + if (regs.scissor_test.enable == 0) { + return; } + const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x; + const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y; + state.scissor.x = regs.scissor_test.min_x; + state.scissor.y = regs.scissor_test.min_y; + state.scissor.width = width; + state.scissor.height = height; } void RasterizerOpenGL::SyncTransformFeedback() { @@ -1046,9 +1127,8 @@ void RasterizerOpenGL::CheckAlphaTests() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { - LOG_CRITICAL( - Render_OpenGL, - "Alpha Testing is enabled with Multiple Render Targets, this behavior is undefined."); + LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, " + "this behavior is undefined."); UNREACHABLE(); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0e90a31f5..8ef0f6c12 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -8,12 +8,12 @@ #include <cstddef> #include <map> #include <memory> +#include <optional> #include <tuple> #include <utility> #include <vector> #include <boost/icl/interval_map.hpp> -#include <boost/optional.hpp> #include <boost/range/iterator_range.hpp> #include <glad/glad.h> @@ -88,11 +88,12 @@ private: /// SamplerInfo struct. void Create(); /// Syncs the sampler object with the config, updating any necessary state. - void SyncWithConfig(const Tegra::Texture::TSCEntry& config); + void SyncWithConfig(const Tegra::Texture::FullTextureInfo& info); private: Tegra::Texture::TextureFilter mag_filter; Tegra::Texture::TextureFilter min_filter; + Tegra::Texture::TextureMipmapFilter mip_filter; Tegra::Texture::WrapMode wrap_u; Tegra::Texture::WrapMode wrap_v; Tegra::Texture::WrapMode wrap_p; @@ -108,9 +109,9 @@ private: * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. * @param single_color_target Specifies if a single color buffer target should be used. */ - void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, - bool preserve_contents = true, - boost::optional<std::size_t> single_color_target = {}); + void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true, + bool using_depth_fb = true, bool preserve_contents = true, + std::optional<std::size_t> single_color_target = {}); /* * Configures the current constbuffers to use for the draw command. @@ -132,8 +133,8 @@ private: u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, GLenum primitive_mode, u32 current_unit); - /// Syncs the viewport to match the guest state - void SyncViewport(); + /// Syncs the viewport and depth range to match the guest state + void SyncViewport(OpenGLState& current_state); /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); @@ -144,11 +145,8 @@ private: /// Syncs the cull mode to match the guest state void SyncCullMode(); - /// Syncs the depth scale to match the guest state - void SyncDepthScale(); - - /// Syncs the depth offset to match the guest state - void SyncDepthOffset(); + /// Syncs the primitve restart to match the guest state + void SyncPrimitiveRestart(); /// Syncs the depth test state to match the guest state void SyncDepthTestState(); @@ -171,6 +169,9 @@ private: /// Syncs the point state to match the guest state void SyncPointState(); + /// Syncs Color Mask + void SyncColorMask(); + /// Check asserts for alpha testing. void CheckAlphaTests(); @@ -206,7 +207,8 @@ private: std::size_t CalculateIndexBufferSize() const; - void SetupVertexArrays(); + void SetupVertexFormat(); + void SetupVertexBuffer(); DrawParameters SetupDraw(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 9c8925383..9ca82c06c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -15,16 +15,24 @@ #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/utils.h" +#include "video_core/surface.h" #include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" #include "video_core/utils.h" namespace OpenGL { -using SurfaceType = SurfaceParams::SurfaceType; -using PixelFormat = SurfaceParams::PixelFormat; -using ComponentType = SurfaceParams::ComponentType; +using VideoCore::Surface::ComponentTypeFromDepthFormat; +using VideoCore::Surface::ComponentTypeFromRenderTarget; +using VideoCore::Surface::ComponentTypeFromTexture; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::PixelFormatFromTextureFormat; +using VideoCore::Surface::SurfaceTargetFromTextureType; struct FormatTuple { GLint internal_format; @@ -34,34 +42,6 @@ struct FormatTuple { bool compressed; }; -static bool IsPixelFormatASTC(PixelFormat format) { - switch (format) { - case PixelFormat::ASTC_2D_4X4: - case PixelFormat::ASTC_2D_5X4: - case PixelFormat::ASTC_2D_8X8: - case PixelFormat::ASTC_2D_8X5: - return true; - default: - return false; - } -} - -static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { - switch (format) { - case PixelFormat::ASTC_2D_4X4: - return {4, 4}; - case PixelFormat::ASTC_2D_5X4: - return {5, 4}; - case PixelFormat::ASTC_2D_8X8: - return {8, 8}; - case PixelFormat::ASTC_2D_8X5: - return {8, 5}; - default: - LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); - UNREACHABLE(); - } -} - void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)}; @@ -78,6 +58,36 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { } } +std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only, + bool uncompressed) const { + const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; + const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; + const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; + u32 m_depth = (layer_only ? 1U : depth); + u32 m_width = MipWidth(mip_level); + u32 m_height = MipHeight(mip_level); + m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x); + m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y); + m_depth = std::max(1U, m_depth >> mip_level); + u32 m_block_height = MipBlockHeight(mip_level); + u32 m_block_depth = MipBlockDepth(mip_level); + return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width, + m_height, m_depth, m_block_height, m_block_depth); +} + +std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, + bool uncompressed) const { + std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth; + std::size_t size = 0; + for (u32 i = 0; i < max_mip_level; i++) { + size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed); + } + if (!force_gl && is_tiled) { + size = Common::AlignUp(size, block_size_bytes); + } + return size; +} + /*static*/ SurfaceParams SurfaceParams::CreateForTexture( const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) { SurfaceParams params{}; @@ -85,8 +95,9 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, - params.pixel_format = - PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value()); + params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), + params.srgb_conversion); params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); @@ -117,6 +128,13 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.target = SurfaceTarget::Texture2D; } break; + case SurfaceTarget::TextureCubeArray: + params.depth = config.tic.Depth() * 6; + if (!entry.IsArray()) { + ASSERT(params.depth == 6); + params.target = SurfaceTarget::TextureCubemap; + } + break; default: LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target)); UNREACHABLE(); @@ -124,6 +142,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { break; } + params.is_layered = SurfaceTargetIsLayered(params.target); params.max_mip_level = config.tic.max_mip_level + 1; params.rt = {}; @@ -142,6 +161,8 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.block_height = 1 << config.memory_layout.block_height; params.block_depth = 1 << config.memory_layout.block_depth; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; @@ -149,7 +170,8 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.depth = 1; - params.max_mip_level = 0; + params.max_mip_level = 1; + params.is_layered = false; // Render target specific parameters, not used for caching params.rt.index = static_cast<u32>(index); @@ -176,12 +198,14 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.pixel_format = PixelFormatFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format); params.type = GetFormatType(params.pixel_format); + params.srgb_conversion = false; params.width = zeta_width; params.height = zeta_height; params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; - params.max_mip_level = 0; + params.max_mip_level = 1; + params.is_layered = false; params.rt = {}; params.InitCacheParameters(zeta_address); @@ -198,6 +222,8 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; @@ -205,7 +231,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.depth = 1; - params.max_mip_level = 0; + params.max_mip_level = 1; params.rt = {}; params.InitCacheParameters(config.Address()); @@ -213,7 +239,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { return params; } -static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ +static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI @@ -229,7 +255,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI - {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT1 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT23 @@ -263,14 +289,33 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U - {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, + false}, // RGBA8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 + // Compressed sRGB formats + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT1_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT23_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT45_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, + ComponentType::UNorm, true}, // BC7U_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB // Depth formats {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F @@ -286,20 +331,22 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form ComponentType::Float, false}, // Z32FS8 }}; -static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) { +static GLenum SurfaceTargetToGL(SurfaceTarget target) { switch (target) { - case SurfaceParams::SurfaceTarget::Texture1D: + case SurfaceTarget::Texture1D: return GL_TEXTURE_1D; - case SurfaceParams::SurfaceTarget::Texture2D: + case SurfaceTarget::Texture2D: return GL_TEXTURE_2D; - case SurfaceParams::SurfaceTarget::Texture3D: + case SurfaceTarget::Texture3D: return GL_TEXTURE_3D; - case SurfaceParams::SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture1DArray: return GL_TEXTURE_1D_ARRAY; - case SurfaceParams::SurfaceTarget::Texture2DArray: + case SurfaceTarget::Texture2DArray: return GL_TEXTURE_2D_ARRAY; - case SurfaceParams::SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubemap: return GL_TEXTURE_CUBE_MAP; + case SurfaceTarget::TextureCubeArray: + return GL_TEXTURE_CUBE_MAP_ARRAY_ARB; } LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target)); UNREACHABLE(); @@ -314,57 +361,41 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return format; } -MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { - u32 actual_height{unaligned_height}; +MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { + u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; if (IsPixelFormatASTC(pixel_format)) { // ASTC formats must stop at the ATSC block size boundary actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); } - return {0, actual_height, width, 0}; -} - -/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN -static bool IsFormatBCn(PixelFormat format) { - switch (format) { - case PixelFormat::DXT1: - case PixelFormat::DXT23: - case PixelFormat::DXT45: - case PixelFormat::DXN1: - case PixelFormat::DXN2SNORM: - case PixelFormat::DXN2UNORM: - case PixelFormat::BC7U: - case PixelFormat::BC6H_UF16: - case PixelFormat::BC6H_SF16: - return true; - } - return false; + return {0, actual_height, MipWidth(mip_level), 0}; } template <bool morton_to_gl, PixelFormat format> void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer, std::size_t gl_buffer_size, VAddr addr) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetBytesPerPixel(format); + constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual // pixel values. - const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; + const u32 tile_size_x{GetDefaultBlockWidth(format)}; + const u32 tile_size_y{GetDefaultBlockHeight(format)}; if (morton_to_gl) { - const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( - addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth); - const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; - memcpy(gl_buffer, data.data(), size_to_copy); + Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, + stride, height, depth, block_height, block_depth); } else { - Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth, + Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, + (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), gl_buffer, false, block_height, block_depth); } } -static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), - SurfaceParams::MaxPixelFormat> - morton_to_gl_fns = { - // clang-format off +using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), + VideoCore::Surface::MaxPixelFormat>; + +static constexpr GLConversionArray morton_to_gl_fns = { + // clang-format off MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, MortonCopy<true, PixelFormat::ABGR8UI>, @@ -405,7 +436,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, MortonCopy<true, PixelFormat::RG16I>, MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>, - MortonCopy<true, PixelFormat::SRGBA8>, + MortonCopy<true, PixelFormat::RGBA8_SRGB>, MortonCopy<true, PixelFormat::RG8U>, MortonCopy<true, PixelFormat::RG8S>, MortonCopy<true, PixelFormat::RG32UI>, @@ -413,18 +444,29 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, MortonCopy<true, PixelFormat::ASTC_2D_8X8>, MortonCopy<true, PixelFormat::ASTC_2D_8X5>, MortonCopy<true, PixelFormat::ASTC_2D_5X4>, + MortonCopy<true, PixelFormat::BGRA8_SRGB>, + MortonCopy<true, PixelFormat::DXT1_SRGB>, + MortonCopy<true, PixelFormat::DXT23_SRGB>, + MortonCopy<true, PixelFormat::DXT45_SRGB>, + MortonCopy<true, PixelFormat::BC7U_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_5X5>, + MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_10X8>, + MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32FS8>, - // clang-format on + // clang-format on }; -static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), - SurfaceParams::MaxPixelFormat> - gl_to_morton_fns = { - // clang-format off +static constexpr GLConversionArray gl_to_morton_fns = { + // clang-format off MortonCopy<false, PixelFormat::ABGR8U>, MortonCopy<false, PixelFormat::ABGR8S>, MortonCopy<false, PixelFormat::ABGR8UI>, @@ -466,7 +508,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, MortonCopy<false, PixelFormat::RG16I>, MortonCopy<false, PixelFormat::RG16S>, MortonCopy<false, PixelFormat::RGB32F>, - MortonCopy<false, PixelFormat::SRGBA8>, + MortonCopy<false, PixelFormat::RGBA8_SRGB>, MortonCopy<false, PixelFormat::RG8U>, MortonCopy<false, PixelFormat::RG8S>, MortonCopy<false, PixelFormat::RG32UI>, @@ -474,17 +516,61 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, nullptr, nullptr, nullptr, + MortonCopy<false, PixelFormat::BGRA8_SRGB>, + MortonCopy<false, PixelFormat::DXT1_SRGB>, + MortonCopy<false, PixelFormat::DXT23_SRGB>, + MortonCopy<false, PixelFormat::DXT45_SRGB>, + MortonCopy<false, PixelFormat::BC7U_SRGB>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32FS8>, - // clang-format on + // clang-format on }; +void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params, + std::vector<u8>& gl_buffer, u32 mip_level) { + u32 depth = params.MipDepth(mip_level); + if (params.target == SurfaceTarget::Texture2D) { + // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. + depth = 1U; + } + if (params.is_layered) { + u64 offset = params.GetMipmapLevelOffset(mip_level); + u64 offset_gl = 0; + const u64 layer_size = params.LayerMemorySize(); + const u64 gl_size = params.LayerSizeGL(mip_level); + for (u32 i = 0; i < params.depth; i++) { + functions[static_cast<std::size_t>(params.pixel_format)]( + params.MipWidth(mip_level), params.MipBlockHeight(mip_level), + params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, + gl_buffer.data() + offset_gl, gl_size, params.addr + offset); + offset += layer_size; + offset_gl += gl_size; + } + } else { + const u64 offset = params.GetMipmapLevelOffset(mip_level); + functions[static_cast<std::size_t>(params.pixel_format)]( + params.MipWidth(mip_level), params.MipBlockHeight(mip_level), + params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), + gl_buffer.size(), params.addr + offset); + } +} + +MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { + MICROPROFILE_SCOPE(OpenGL_BlitSurface); const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; @@ -495,19 +581,21 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, OpenGLState state; state.draw.read_framebuffer = read_fb_handle; state.draw.draw_framebuffer = draw_fb_handle; - state.Apply(); + // Set sRGB enabled if the destination surfaces need it + state.framebuffer_srgb.enabled = dst_params.srgb_conversion; + state.ApplyFramebufferState(); u32 buffers{}; if (src_params.type == SurfaceType::ColorTexture) { switch (src_params.target) { - case SurfaceParams::SurfaceTarget::Texture2D: + case SurfaceTarget::Texture2D: glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, GL_TEXTURE_2D, src_surface->Texture().handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); break; - case SurfaceParams::SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubemap: glFramebufferTexture2D( GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), @@ -516,12 +604,12 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); break; - case SurfaceParams::SurfaceTarget::Texture2DArray: + case SurfaceTarget::Texture2DArray: glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, src_surface->Texture().handle, 0, 0); glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); break; - case SurfaceParams::SurfaceTarget::Texture3D: + case SurfaceTarget::Texture3D: glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, SurfaceTargetToGL(src_params.target), src_surface->Texture().handle, 0, 0); @@ -537,13 +625,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, } switch (dst_params.target) { - case SurfaceParams::SurfaceTarget::Texture2D: + case SurfaceTarget::Texture2D: glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, GL_TEXTURE_2D, dst_surface->Texture().handle, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); break; - case SurfaceParams::SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubemap: glFramebufferTexture2D( GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), @@ -552,13 +640,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); break; - case SurfaceParams::SurfaceTarget::Texture2DArray: + case SurfaceTarget::Texture2DArray: glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, dst_surface->Texture().handle, 0, 0); glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); break; - case SurfaceParams::SurfaceTarget::Texture3D: + case SurfaceTarget::Texture3D: glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, SurfaceTargetToGL(dst_params.target), dst_surface->Texture().handle, 0, 0); @@ -622,18 +710,20 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa 0, 0, width, height, 1); } +MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); static void CopySurface(const Surface& src_surface, const Surface& dst_surface, - GLuint copy_pbo_handle, GLenum src_attachment = 0, - GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { + const GLuint copy_pbo_handle, const GLenum src_attachment = 0, + const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) { + MICROPROFILE_SCOPE(OpenGL_CopySurface); ASSERT_MSG(dst_attachment == 0, "Unimplemented"); const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; - auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); - auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); + const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); - std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); + const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); @@ -657,13 +747,10 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface, LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " "reinterpretation but the texture is tiled."); } - std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; - std::vector<u8> data(remaining_size); - std::memcpy(data.data(), Memory::GetPointer(dst_params.addr + src_params.size_in_bytes), - data.size()); + const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, - data.data()); + Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -679,21 +766,22 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface, UNREACHABLE(); } else { switch (dst_params.target) { - case SurfaceParams::SurfaceTarget::Texture1D: + case SurfaceTarget::Texture1D: glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format, dest_format.type, nullptr); break; - case SurfaceParams::SurfaceTarget::Texture2D: + case SurfaceTarget::Texture2D: glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height, dest_format.format, dest_format.type, nullptr); break; - case SurfaceParams::SurfaceTarget::Texture3D: - case SurfaceParams::SurfaceTarget::Texture2DArray: + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height, static_cast<GLsizei>(dst_params.depth), dest_format.format, dest_format.type, nullptr); break; - case SurfaceParams::SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubemap: glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, static_cast<GLint>(cubemap_face), width, height, 1, dest_format.format, dest_format.type, nullptr); @@ -730,35 +818,43 @@ CachedSurface::CachedSurface(const SurfaceParams& params) if (!format_tuple.compressed) { // Only pre-create the texture for non-compressed textures. switch (params.target) { - case SurfaceParams::SurfaceTarget::Texture1D: - glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, - rect.GetWidth()); + case SurfaceTarget::Texture1D: + glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level, + format_tuple.internal_format, rect.GetWidth()); break; - case SurfaceParams::SurfaceTarget::Texture2D: - case SurfaceParams::SurfaceTarget::TextureCubemap: - glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, - rect.GetWidth(), rect.GetHeight()); + case SurfaceTarget::Texture2D: + case SurfaceTarget::TextureCubemap: + glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level, + format_tuple.internal_format, rect.GetWidth(), rect.GetHeight()); break; - case SurfaceParams::SurfaceTarget::Texture3D: - case SurfaceParams::SurfaceTarget::Texture2DArray: - glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, - rect.GetWidth(), rect.GetHeight(), params.depth); + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level, + format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(), + params.depth); break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast<u32>(params.target)); UNREACHABLE(); - glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(), - rect.GetHeight()); + glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format, + rect.GetWidth(), rect.GetHeight()); } } glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL, + params.max_mip_level - 1); + if (params.max_mip_level == 1) { + glTexParameterf(SurfaceTargetToGL(params.target), GL_TEXTURE_LOD_BIAS, 1000.0); + } - VideoCore::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, - SurfaceParams::SurfaceTargetName(params.target)); + LabelGLObject(GL_TEXTURE, texture.handle, params.addr, + SurfaceParams::SurfaceTargetName(params.target)); // Clamp size to mapped GPU memory region // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 @@ -788,7 +884,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo S8Z24 s8z24_pixel{}; Z24S8 z24s8_pixel{}; - constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::S8Z24)}; + constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)}; for (std::size_t y = 0; y < height; ++y) { for (std::size_t x = 0; x < width; ++x) { const std::size_t offset{bpp * (y * width + x)}; @@ -808,7 +904,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo } static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { - constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::G8R8U)}; + constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)}; for (std::size_t y = 0; y < height; ++y) { for (std::size_t x = 0; x < width; ++x) { const std::size_t offset{bpp * (y * width + x)}; @@ -825,17 +921,26 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { * typical desktop GPUs. */ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, - u32 width, u32 height) { + u32 width, u32 height, u32 depth) { switch (pixel_format) { case PixelFormat::ASTC_2D_4X4: case PixelFormat::ASTC_2D_8X8: case PixelFormat::ASTC_2D_8X5: - case PixelFormat::ASTC_2D_5X4: { + case PixelFormat::ASTC_2D_5X4: + case PixelFormat::ASTC_2D_5X5: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_5X4_SRGB: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_SRGB: { // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. u32 block_width{}; u32 block_height{}; std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height); + data = + Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); break; } case PixelFormat::S8Z24: @@ -862,7 +967,13 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm case PixelFormat::G8R8U: case PixelFormat::G8R8S: case PixelFormat::ASTC_2D_4X4: - case PixelFormat::ASTC_2D_8X8: { + case PixelFormat::ASTC_2D_8X8: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_5X5: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_SRGB: { LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", static_cast<u32>(pixel_format)); UNREACHABLE(); @@ -875,34 +986,25 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm } } -MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); void CachedSurface::LoadGLBuffer() { MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - - gl_buffer.resize(params.size_in_bytes_gl); + gl_buffer.resize(params.max_mip_level); + for (u32 i = 0; i < params.max_mip_level; i++) + gl_buffer[i].resize(params.GetMipmapSizeGL(i)); if (params.is_tiled) { - u32 depth = params.depth; - u32 block_depth = params.block_depth; - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", params.block_width, static_cast<u32>(params.target)); - - if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { - // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. - depth = 1U; - block_depth = 1U; - } - - morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(), - gl_buffer.size(), params.addr); + for (u32 i = 0; i < params.max_mip_level; i++) + SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); } else { const auto texture_src_data{Memory::GetPointer(params.addr)}; const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; - gl_buffer.assign(texture_src_data, texture_src_data_end); + gl_buffer[0].assign(texture_src_data, texture_src_data_end); } - - ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); + for (u32 i = 0; i < params.max_mip_level; i++) + ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), + params.MipHeight(i), params.MipDepth(i)); } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); @@ -912,57 +1014,44 @@ void CachedSurface::FlushGLBuffer() { ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); // OpenGL temporary buffer needs to be big enough to store raw texture size - gl_buffer.resize(GetSizeInBytes()); + gl_buffer.resize(1); + gl_buffer[0].resize(GetSizeInBytes()); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0); + ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); ASSERT(!tuple.compressed); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(), - gl_buffer.data()); + glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, + static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); glPixelStorei(GL_PACK_ROW_LENGTH, 0); - ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width, + ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, params.height); ASSERT(params.type != SurfaceType::Fill); const u8* const texture_src_data = Memory::GetPointer(params.addr); ASSERT(texture_src_data); if (params.is_tiled) { - u32 depth = params.depth; - u32 block_depth = params.block_depth; - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", params.block_width, static_cast<u32>(params.target)); - if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { - // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. - depth = 1U; - } - gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(), - gl_buffer.size(), GetAddr()); + SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); } else { - std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes()); + std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); } } -MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); -void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { - if (params.type == SurfaceType::Fill) - return; - - MICROPROFILE_SCOPE(OpenGL_TextureUL); - - const auto& rect{params.GetRect()}; +void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, + GLuint draw_fb_handle) { + const auto& rect{params.GetRect(mip_map)}; // Load data from memory to the surface const GLint x0 = static_cast<GLint>(rect.left); const GLint y0 = static_cast<GLint>(rect.bottom); std::size_t buffer_offset = - static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width + + static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) + static_cast<std::size_t>(x0)) * - SurfaceParams::GetBytesPerPixel(params.pixel_format); + GetBytesPerPixel(params.pixel_format); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); const GLuint target_tex = texture.handle; @@ -978,89 +1067,120 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle cur_state.Apply(); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width)); + ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); + GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); glActiveTexture(GL_TEXTURE0); if (tuple.compressed) { switch (params.target) { - case SurfaceParams::SurfaceTarget::Texture2D: - glCompressedTexImage2D( - SurfaceTargetToGL(params.target), 0, tuple.internal_format, - static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0, - static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]); + case SurfaceTarget::Texture2D: + glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, + static_cast<GLsizei>(params.MipWidth(mip_map)), + static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size, + &gl_buffer[mip_map][buffer_offset]); + break; + case SurfaceTarget::Texture3D: + glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, + static_cast<GLsizei>(params.MipWidth(mip_map)), + static_cast<GLsizei>(params.MipHeight(mip_map)), + static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size, + &gl_buffer[mip_map][buffer_offset]); break; - case SurfaceParams::SurfaceTarget::Texture3D: - case SurfaceParams::SurfaceTarget::Texture2DArray: - glCompressedTexImage3D( - SurfaceTargetToGL(params.target), 0, tuple.internal_format, - static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), - static_cast<GLsizei>(params.depth), 0, - static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]); + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, + static_cast<GLsizei>(params.MipWidth(mip_map)), + static_cast<GLsizei>(params.MipHeight(mip_map)), + static_cast<GLsizei>(params.depth), 0, image_size, + &gl_buffer[mip_map][buffer_offset]); break; - case SurfaceParams::SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubemap: { + GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map)); for (std::size_t face = 0; face < params.depth; ++face) { glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), - 0, tuple.internal_format, static_cast<GLsizei>(params.width), - static_cast<GLsizei>(params.height), 0, - static_cast<GLsizei>(params.SizeInBytesCubeFaceGL()), - &gl_buffer[buffer_offset]); - buffer_offset += params.SizeInBytesCubeFace(); + mip_map, tuple.internal_format, + static_cast<GLsizei>(params.MipWidth(mip_map)), + static_cast<GLsizei>(params.MipHeight(mip_map)), 0, + layer_size, &gl_buffer[mip_map][buffer_offset]); + buffer_offset += layer_size; } break; + } default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast<u32>(params.target)); UNREACHABLE(); - glCompressedTexImage2D( - GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), - static_cast<GLsizei>(params.height), 0, - static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]); + glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format, + static_cast<GLsizei>(params.MipWidth(mip_map)), + static_cast<GLsizei>(params.MipHeight(mip_map)), 0, + static_cast<GLsizei>(params.size_in_bytes_gl), + &gl_buffer[mip_map][buffer_offset]); } } else { switch (params.target) { - case SurfaceParams::SurfaceTarget::Texture1D: - glTexSubImage1D(SurfaceTargetToGL(params.target), 0, x0, + case SurfaceTarget::Texture1D: + glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0, static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type, - &gl_buffer[buffer_offset]); + &gl_buffer[mip_map][buffer_offset]); break; - case SurfaceParams::SurfaceTarget::Texture2D: - glTexSubImage2D(SurfaceTargetToGL(params.target), 0, x0, y0, + case SurfaceTarget::Texture2D: + glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[buffer_offset]); + &gl_buffer[mip_map][buffer_offset]); break; - case SurfaceParams::SurfaceTarget::Texture3D: - case SurfaceParams::SurfaceTarget::Texture2DArray: - glTexSubImage3D(SurfaceTargetToGL(params.target), 0, x0, y0, 0, + case SurfaceTarget::Texture3D: + glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, + static_cast<GLsizei>(rect.GetWidth()), + static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map), + tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); + break; + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, - tuple.type, &gl_buffer[buffer_offset]); + tuple.type, &gl_buffer[mip_map][buffer_offset]); break; - case SurfaceParams::SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubemap: { + std::size_t start = buffer_offset; for (std::size_t face = 0; face < params.depth; ++face) { - glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0, - y0, static_cast<GLsizei>(rect.GetWidth()), + glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map, + x0, y0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[buffer_offset]); - buffer_offset += params.SizeInBytesCubeFace(); + &gl_buffer[mip_map][buffer_offset]); + buffer_offset += params.LayerSizeGL(mip_map); } break; + } default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast<u32>(params.target)); UNREACHABLE(); - glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), + glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[buffer_offset]); + &gl_buffer[mip_map][buffer_offset]); } } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } -RasterizerCacheOpenGL::RasterizerCacheOpenGL() { +MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); +void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) + return; + + MICROPROFILE_SCOPE(OpenGL_TextureUL); + + for (u32 i = 0; i < params.max_mip_level; i++) + UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle); +} + +RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) + : RasterizerCache{rasterizer} { read_framebuffer.Create(); draw_framebuffer.Create(); copy_pbo.Create(); @@ -1179,7 +1299,7 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface) { const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; - FlushRegion(src_params.addr, dst_params.size_in_bytes); + FlushRegion(src_params.addr, dst_params.MemorySize()); LoadSurface(dst_surface); } @@ -1200,8 +1320,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, // For compatible surfaces, we can just do fast glCopyImageSubData based copy if (old_params.target == new_params.target && old_params.type == new_params.type && old_params.depth == new_params.depth && old_params.depth == 1 && - SurfaceParams::GetFormatBpp(old_params.pixel_format) == - SurfaceParams::GetFormatBpp(new_params.pixel_format)) { + GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) { FastCopySurface(old_surface, new_surface); return new_surface; } @@ -1214,51 +1333,19 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, const bool is_blit{old_params.pixel_format == new_params.pixel_format}; switch (new_params.target) { - case SurfaceParams::SurfaceTarget::Texture2D: + case SurfaceTarget::Texture2D: if (is_blit) { BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle); } else { CopySurface(old_surface, new_surface, copy_pbo.handle); } break; - case SurfaceParams::SurfaceTarget::Texture3D: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: AccurateCopySurface(old_surface, new_surface); break; - case SurfaceParams::SurfaceTarget::TextureCubemap: { - if (old_params.rt.array_mode != 1) { - // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this - // yet (array rendering used as a cubemap texture). - LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode); - UNREACHABLE(); - return new_surface; - } - - // This seems to be used for render-to-cubemap texture - ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected"); - ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected"); - ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented"); - - // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels. - // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild. - const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)}; - - for (std::size_t index = 0; index < new_params.depth; ++index) { - Surface face_surface{TryGetReservedSurface(old_params)}; - ASSERT_MSG(face_surface, "Unexpected"); - - if (is_blit) { - BlitSurface(face_surface, new_surface, read_framebuffer.handle, - draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index, - new_params.rt.index, index); - } else { - CopySurface(face_surface, new_surface, copy_pbo.handle, - face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index); - } - - old_params.addr += byte_stride; - } - break; - } default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast<u32>(new_params.target)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0dd0d90a3..494f6b903 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -7,6 +7,7 @@ #include <array> #include <map> #include <memory> +#include <string> #include <vector> #include "common/alignment.h" @@ -18,6 +19,7 @@ #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/surface.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" @@ -27,126 +29,12 @@ class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; -struct SurfaceParams { - enum class PixelFormat { - ABGR8U = 0, - ABGR8S = 1, - ABGR8UI = 2, - B5G6R5U = 3, - A2B10G10R10U = 4, - A1B5G5R5U = 5, - R8U = 6, - R8UI = 7, - RGBA16F = 8, - RGBA16U = 9, - RGBA16UI = 10, - R11FG11FB10F = 11, - RGBA32UI = 12, - DXT1 = 13, - DXT23 = 14, - DXT45 = 15, - DXN1 = 16, // This is also known as BC4 - DXN2UNORM = 17, - DXN2SNORM = 18, - BC7U = 19, - BC6H_UF16 = 20, - BC6H_SF16 = 21, - ASTC_2D_4X4 = 22, - G8R8U = 23, - G8R8S = 24, - BGRA8 = 25, - RGBA32F = 26, - RG32F = 27, - R32F = 28, - R16F = 29, - R16U = 30, - R16S = 31, - R16UI = 32, - R16I = 33, - RG16 = 34, - RG16F = 35, - RG16UI = 36, - RG16I = 37, - RG16S = 38, - RGB32F = 39, - SRGBA8 = 40, - RG8U = 41, - RG8S = 42, - RG32UI = 43, - R32UI = 44, - ASTC_2D_8X8 = 45, - ASTC_2D_8X5 = 46, - ASTC_2D_5X4 = 47, - - MaxColorFormat, - - // Depth formats - Z32F = 48, - Z16 = 49, - - MaxDepthFormat, - - // DepthStencil formats - Z24S8 = 50, - S8Z24 = 51, - Z32FS8 = 52, - - MaxDepthStencilFormat, - - Max = MaxDepthStencilFormat, - Invalid = 255, - }; - - static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); - - enum class ComponentType { - Invalid = 0, - SNorm = 1, - UNorm = 2, - SInt = 3, - UInt = 4, - Float = 5, - }; - - enum class SurfaceType { - ColorTexture = 0, - Depth = 1, - DepthStencil = 2, - Fill = 3, - Invalid = 4, - }; - - enum class SurfaceTarget { - Texture1D, - Texture2D, - Texture3D, - Texture1DArray, - Texture2DArray, - TextureCubemap, - }; - - static SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) { - switch (texture_type) { - case Tegra::Texture::TextureType::Texture1D: - return SurfaceTarget::Texture1D; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return SurfaceTarget::Texture2D; - case Tegra::Texture::TextureType::Texture3D: - return SurfaceTarget::Texture3D; - case Tegra::Texture::TextureType::TextureCubemap: - return SurfaceTarget::TextureCubemap; - case Tegra::Texture::TextureType::Texture1DArray: - return SurfaceTarget::Texture1DArray; - case Tegra::Texture::TextureType::Texture2DArray: - return SurfaceTarget::Texture2DArray; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type)); - UNREACHABLE(); - return SurfaceTarget::Texture2D; - } - } +using SurfaceTarget = VideoCore::Surface::SurfaceTarget; +using SurfaceType = VideoCore::Surface::SurfaceType; +using PixelFormat = VideoCore::Surface::PixelFormat; +using ComponentType = VideoCore::Surface::ComponentType; +struct SurfaceParams { static std::string SurfaceTargetName(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: @@ -161,6 +49,8 @@ struct SurfaceParams { return "Texture2DArray"; case SurfaceTarget::TextureCubemap: return "TextureCubemap"; + case SurfaceTarget::TextureCubeArray: + return "TextureCubeArray"; default: LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); UNREACHABLE(); @@ -168,552 +58,12 @@ struct SurfaceParams { } } - /** - * Gets the compression factor for the specified PixelFormat. This applies to just the - * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed - * texture formats. - */ - static constexpr u32 GetCompressionFactor(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // SRGBA8 - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // R32UI - 4, // ASTC_2D_8X8 - 4, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 - }}; - - ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); - return compression_factor_table[static_cast<std::size_t>(format)]; - } - - static constexpr u32 GetFormatBpp(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8U - 32, // ABGR8S - 32, // ABGR8UI - 16, // B5G6R5U - 32, // A2B10G10R10U - 16, // A1B5G5R5U - 8, // R8U - 8, // R8UI - 64, // RGBA16F - 64, // RGBA16U - 64, // RGBA16UI - 32, // R11FG11FB10F - 128, // RGBA32UI - 64, // DXT1 - 128, // DXT23 - 128, // DXT45 - 64, // DXN1 - 128, // DXN2UNORM - 128, // DXN2SNORM - 128, // BC7U - 128, // BC6H_UF16 - 128, // BC6H_SF16 - 32, // ASTC_2D_4X4 - 16, // G8R8U - 16, // G8R8S - 32, // BGRA8 - 128, // RGBA32F - 64, // RG32F - 32, // R32F - 16, // R16F - 16, // R16U - 16, // R16S - 16, // R16UI - 16, // R16I - 32, // RG16 - 32, // RG16F - 32, // RG16UI - 32, // RG16I - 32, // RG16S - 96, // RGB32F - 32, // SRGBA8 - 16, // RG8U - 16, // RG8S - 64, // RG32UI - 32, // R32UI - 16, // ASTC_2D_8X8 - 32, // ASTC_2D_8X5 - 32, // ASTC_2D_5X4 - 32, // Z32F - 16, // Z16 - 32, // Z24S8 - 32, // S8Z24 - 64, // Z32FS8 - }}; - - ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); - return bpp_table[static_cast<std::size_t>(format)]; - } - u32 GetFormatBpp() const { - return GetFormatBpp(pixel_format); - } - - static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { - switch (format) { - case Tegra::DepthFormat::S8_Z24_UNORM: - return PixelFormat::S8Z24; - case Tegra::DepthFormat::Z24_S8_UNORM: - return PixelFormat::Z24S8; - case Tegra::DepthFormat::Z32_FLOAT: - return PixelFormat::Z32F; - case Tegra::DepthFormat::Z16_UNORM: - return PixelFormat::Z16; - case Tegra::DepthFormat::Z32_S8_X24_FLOAT: - return PixelFormat::Z32FS8; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { - switch (format) { - // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the - // gamma. - case Tegra::RenderTargetFormat::RGBA8_SRGB: - case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::ABGR8U; - case Tegra::RenderTargetFormat::RGBA8_SNORM: - return PixelFormat::ABGR8S; - case Tegra::RenderTargetFormat::RGBA8_UINT: - return PixelFormat::ABGR8UI; - case Tegra::RenderTargetFormat::BGRA8_SRGB: - case Tegra::RenderTargetFormat::BGRA8_UNORM: - return PixelFormat::BGRA8; - case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return PixelFormat::A2B10G10R10U; - case Tegra::RenderTargetFormat::RGBA16_FLOAT: - return PixelFormat::RGBA16F; - case Tegra::RenderTargetFormat::RGBA16_UNORM: - return PixelFormat::RGBA16U; - case Tegra::RenderTargetFormat::RGBA16_UINT: - return PixelFormat::RGBA16UI; - case Tegra::RenderTargetFormat::RGBA32_FLOAT: - return PixelFormat::RGBA32F; - case Tegra::RenderTargetFormat::RG32_FLOAT: - return PixelFormat::RG32F; - case Tegra::RenderTargetFormat::R11G11B10_FLOAT: - return PixelFormat::R11FG11FB10F; - case Tegra::RenderTargetFormat::B5G6R5_UNORM: - return PixelFormat::B5G6R5U; - case Tegra::RenderTargetFormat::BGR5A1_UNORM: - return PixelFormat::A1B5G5R5U; - case Tegra::RenderTargetFormat::RGBA32_UINT: - return PixelFormat::RGBA32UI; - case Tegra::RenderTargetFormat::R8_UNORM: - return PixelFormat::R8U; - case Tegra::RenderTargetFormat::R8_UINT: - return PixelFormat::R8UI; - case Tegra::RenderTargetFormat::RG16_FLOAT: - return PixelFormat::RG16F; - case Tegra::RenderTargetFormat::RG16_UINT: - return PixelFormat::RG16UI; - case Tegra::RenderTargetFormat::RG16_SINT: - return PixelFormat::RG16I; - case Tegra::RenderTargetFormat::RG16_UNORM: - return PixelFormat::RG16; - case Tegra::RenderTargetFormat::RG16_SNORM: - return PixelFormat::RG16S; - case Tegra::RenderTargetFormat::RG8_UNORM: - return PixelFormat::RG8U; - case Tegra::RenderTargetFormat::RG8_SNORM: - return PixelFormat::RG8S; - case Tegra::RenderTargetFormat::R16_FLOAT: - return PixelFormat::R16F; - case Tegra::RenderTargetFormat::R16_UNORM: - return PixelFormat::R16U; - case Tegra::RenderTargetFormat::R16_SNORM: - return PixelFormat::R16S; - case Tegra::RenderTargetFormat::R16_UINT: - return PixelFormat::R16UI; - case Tegra::RenderTargetFormat::R16_SINT: - return PixelFormat::R16I; - case Tegra::RenderTargetFormat::R32_FLOAT: - return PixelFormat::R32F; - case Tegra::RenderTargetFormat::R32_UINT: - return PixelFormat::R32UI; - case Tegra::RenderTargetFormat::RG32_UINT: - return PixelFormat::RG32UI; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, - Tegra::Texture::ComponentType component_type) { - // TODO(Subv): Properly implement this - switch (format) { - case Tegra::Texture::TextureFormat::A8R8G8B8: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::ABGR8U; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::ABGR8S; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::ABGR8UI; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::B5G6R5: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::B5G6R5U; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::A2B10G10R10: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::A2B10G10R10U; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::A1B5G5R5: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::A1B5G5R5U; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::R8: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R8U; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::R8UI; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::G8R8: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::G8R8U; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::G8R8S; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::R16_G16_B16_A16: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::RGBA16U; - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RGBA16F; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::BF10GF11RF11: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::R11FG11FB10F; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::R32_G32_B32_A32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RGBA32F; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::RGBA32UI; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::R32_G32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RG32F; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::RG32UI; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::R32_G32_B32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RGB32F; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::R16: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::R16F; - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R16U; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::R16S; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::R16UI; - case Tegra::Texture::ComponentType::SINT: - return PixelFormat::R16I; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::R32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::R32F; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::R32UI; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::ZF32: - return PixelFormat::Z32F; - case Tegra::Texture::TextureFormat::Z16: - return PixelFormat::Z16; - case Tegra::Texture::TextureFormat::Z24S8: - return PixelFormat::Z24S8; - case Tegra::Texture::TextureFormat::DXT1: - return PixelFormat::DXT1; - case Tegra::Texture::TextureFormat::DXT23: - return PixelFormat::DXT23; - case Tegra::Texture::TextureFormat::DXT45: - return PixelFormat::DXT45; - case Tegra::Texture::TextureFormat::DXN1: - return PixelFormat::DXN1; - case Tegra::Texture::TextureFormat::DXN2: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::DXN2UNORM; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::DXN2SNORM; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - case Tegra::Texture::TextureFormat::BC7U: - return PixelFormat::BC7U; - case Tegra::Texture::TextureFormat::BC6H_UF16: - return PixelFormat::BC6H_UF16; - case Tegra::Texture::TextureFormat::BC6H_SF16: - return PixelFormat::BC6H_SF16; - case Tegra::Texture::TextureFormat::ASTC_2D_4X4: - return PixelFormat::ASTC_2D_4X4; - case Tegra::Texture::TextureFormat::ASTC_2D_5X4: - return PixelFormat::ASTC_2D_5X4; - case Tegra::Texture::TextureFormat::ASTC_2D_8X8: - return PixelFormat::ASTC_2D_8X8; - case Tegra::Texture::TextureFormat::ASTC_2D_8X5: - return PixelFormat::ASTC_2D_8X5; - case Tegra::Texture::TextureFormat::R16_G16: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RG16F; - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::RG16; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::RG16S; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::RG16UI; - case Tegra::Texture::ComponentType::SINT: - return PixelFormat::RG16I; - } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", - static_cast<u32>(component_type)); - UNREACHABLE(); - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", - static_cast<u32>(format), static_cast<u32>(component_type)); - UNREACHABLE(); - } - } - - static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { - // TODO(Subv): Implement more component types - switch (type) { - case Tegra::Texture::ComponentType::UNORM: - return ComponentType::UNorm; - case Tegra::Texture::ComponentType::FLOAT: - return ComponentType::Float; - case Tegra::Texture::ComponentType::SNORM: - return ComponentType::SNorm; - case Tegra::Texture::ComponentType::UINT: - return ComponentType::UInt; - case Tegra::Texture::ComponentType::SINT: - return ComponentType::SInt; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); - UNREACHABLE(); - } - } - - static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) { - // TODO(Subv): Implement more render targets - switch (format) { - case Tegra::RenderTargetFormat::RGBA8_UNORM: - case Tegra::RenderTargetFormat::RGBA8_SRGB: - case Tegra::RenderTargetFormat::BGRA8_UNORM: - case Tegra::RenderTargetFormat::BGRA8_SRGB: - case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - case Tegra::RenderTargetFormat::R8_UNORM: - case Tegra::RenderTargetFormat::RG16_UNORM: - case Tegra::RenderTargetFormat::R16_UNORM: - case Tegra::RenderTargetFormat::B5G6R5_UNORM: - case Tegra::RenderTargetFormat::BGR5A1_UNORM: - case Tegra::RenderTargetFormat::RG8_UNORM: - case Tegra::RenderTargetFormat::RGBA16_UNORM: - return ComponentType::UNorm; - case Tegra::RenderTargetFormat::RGBA8_SNORM: - case Tegra::RenderTargetFormat::RG16_SNORM: - case Tegra::RenderTargetFormat::R16_SNORM: - case Tegra::RenderTargetFormat::RG8_SNORM: - return ComponentType::SNorm; - case Tegra::RenderTargetFormat::RGBA16_FLOAT: - case Tegra::RenderTargetFormat::R11G11B10_FLOAT: - case Tegra::RenderTargetFormat::RGBA32_FLOAT: - case Tegra::RenderTargetFormat::RG32_FLOAT: - case Tegra::RenderTargetFormat::RG16_FLOAT: - case Tegra::RenderTargetFormat::R16_FLOAT: - case Tegra::RenderTargetFormat::R32_FLOAT: - return ComponentType::Float; - case Tegra::RenderTargetFormat::RGBA32_UINT: - case Tegra::RenderTargetFormat::RGBA16_UINT: - case Tegra::RenderTargetFormat::RG16_UINT: - case Tegra::RenderTargetFormat::R8_UINT: - case Tegra::RenderTargetFormat::R16_UINT: - case Tegra::RenderTargetFormat::RG32_UINT: - case Tegra::RenderTargetFormat::R32_UINT: - case Tegra::RenderTargetFormat::RGBA8_UINT: - return ComponentType::UInt; - case Tegra::RenderTargetFormat::RG16_SINT: - case Tegra::RenderTargetFormat::R16_SINT: - return ComponentType::SInt; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { - switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8U; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { - switch (format) { - case Tegra::DepthFormat::Z16_UNORM: - case Tegra::DepthFormat::S8_Z24_UNORM: - case Tegra::DepthFormat::Z24_S8_UNORM: - return ComponentType::UNorm; - case Tegra::DepthFormat::Z32_FLOAT: - case Tegra::DepthFormat::Z32_S8_X24_FLOAT: - return ComponentType::Float; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static SurfaceType GetFormatType(PixelFormat pixel_format) { - if (static_cast<std::size_t>(pixel_format) < - static_cast<std::size_t>(PixelFormat::MaxColorFormat)) { - return SurfaceType::ColorTexture; - } - - if (static_cast<std::size_t>(pixel_format) < - static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) { - return SurfaceType::Depth; - } - - if (static_cast<std::size_t>(pixel_format) < - static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { - return SurfaceType::DepthStencil; - } - - // TODO(Subv): Implement the other formats - ASSERT(false); - - return SurfaceType::Invalid; - } - - /// Returns the sizer in bytes of the specified pixel format - static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { - if (pixel_format == SurfaceParams::PixelFormat::Invalid) { - return 0; - } - return GetFormatBpp(pixel_format) / CHAR_BIT; + return VideoCore::Surface::GetFormatBpp(pixel_format); } /// Returns the rectangle corresponding to this surface - MathUtil::Rectangle<u32> GetRect() const; + MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; /// Returns the total size of this surface in bytes, adjusted for compression std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { @@ -742,6 +92,91 @@ struct SurfaceParams { return size_in_bytes_gl / 6; } + /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. + std::size_t MemorySize() const { + std::size_t size = InnerMemorySize(false, is_layered); + if (is_layered) + return size * depth; + return size; + } + + /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including + /// mipmaps. + std::size_t LayerMemorySize() const { + return InnerMemorySize(false, true); + } + + /// Returns the size of a layer of this surface in OpenGL. + std::size_t LayerSizeGL(u32 mip_level) const { + return InnerMipmapMemorySize(mip_level, true, is_layered, false); + } + + std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const { + std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed); + if (is_layered) + return size * depth; + return size; + } + + std::size_t GetMipmapLevelOffset(u32 mip_level) const { + std::size_t offset = 0; + for (u32 i = 0; i < mip_level; i++) + offset += InnerMipmapMemorySize(i, false, is_layered); + return offset; + } + + std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const { + std::size_t offset = 0; + for (u32 i = 0; i < mip_level; i++) + offset += InnerMipmapMemorySize(i, true, is_layered); + return offset; + } + + u32 MipWidth(u32 mip_level) const { + return std::max(1U, width >> mip_level); + } + + u32 MipHeight(u32 mip_level) const { + return std::max(1U, height >> mip_level); + } + + u32 MipDepth(u32 mip_level) const { + return is_layered ? depth : std::max(1U, depth >> mip_level); + } + + // Auto block resizing algorithm from: + // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c + u32 MipBlockHeight(u32 mip_level) const { + if (mip_level == 0) + return block_height; + u32 alt_height = MipHeight(mip_level); + u32 h = GetDefaultBlockHeight(pixel_format); + u32 blocks_in_y = (alt_height + h - 1) / h; + u32 bh = 16; + while (bh > 1 && blocks_in_y <= bh * 4) { + bh >>= 1; + } + return bh; + } + + u32 MipBlockDepth(u32 mip_level) const { + if (mip_level == 0) + return block_depth; + if (is_layered) + return 1; + u32 depth = MipDepth(mip_level); + u32 bd = 32; + while (bd > 1 && depth * 2 <= bd) { + bd >>= 1; + } + if (bd == 32) { + u32 bh = MipBlockHeight(mip_level); + if (bh >= 4) + return 16; + } + return bd; + } + /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry); @@ -782,7 +217,8 @@ struct SurfaceParams { u32 unaligned_height; SurfaceTarget target; u32 max_mip_level; - + bool is_layered; + bool srgb_conversion; // Parameters used for caching VAddr addr; Tegra::GPUVAddr gpu_addr; @@ -797,6 +233,12 @@ struct SurfaceParams { u32 layer_stride; u32 base_layer; } rt; + +private: + std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false, + bool uncompressed = false) const; + std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false, + bool uncompressed = false) const; }; }; // namespace OpenGL @@ -822,6 +264,8 @@ struct hash<SurfaceReserveKey> { namespace OpenGL { +class RasterizerOpenGL; + class CachedSurface final : public RasterizerCacheObject { public: CachedSurface(const SurfaceParams& params); @@ -858,8 +302,10 @@ public: void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); private: + void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); + OGLTexture texture; - std::vector<u8> gl_buffer; + std::vector<std::vector<u8>> gl_buffer; SurfaceParams params; GLenum gl_target; std::size_t cached_size_in_bytes; @@ -867,7 +313,7 @@ private: class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { public: - RasterizerCacheOpenGL(); + explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer); /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp new file mode 100644 index 000000000..c17d5ac00 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -0,0 +1,186 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> +#include <glad/glad.h> +#include "common/common_types.h" +#include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_state.h" + +MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); + +namespace OpenGL { + +void OGLTexture::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenTextures(1, &handle); +} + +void OGLTexture::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteTextures(1, &handle); + OpenGLState::GetCurState().UnbindTexture(handle).Apply(); + handle = 0; +} + +void OGLSampler::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenSamplers(1, &handle); +} + +void OGLSampler::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteSamplers(1, &handle); + OpenGLState::GetCurState().ResetSampler(handle).Apply(); + handle = 0; +} + +void OGLShader::Create(const char* source, GLenum type) { + if (handle != 0) + return; + if (source == nullptr) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + handle = GLShader::LoadShader(source, type); +} + +void OGLShader::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteShader(handle); + handle = 0; +} + +void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, + const char* frag_shader, bool separable_program) { + OGLShader vert, geo, frag; + if (vert_shader) + vert.Create(vert_shader, GL_VERTEX_SHADER); + if (geo_shader) + geo.Create(geo_shader, GL_GEOMETRY_SHADER); + if (frag_shader) + frag.Create(frag_shader, GL_FRAGMENT_SHADER); + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + Create(separable_program, vert.handle, geo.handle, frag.handle); +} + +void OGLProgram::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteProgram(handle); + OpenGLState::GetCurState().ResetProgram(handle).Apply(); + handle = 0; +} + +void OGLPipeline::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenProgramPipelines(1, &handle); +} + +void OGLPipeline::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteProgramPipelines(1, &handle); + OpenGLState::GetCurState().ResetPipeline(handle).Apply(); + handle = 0; +} + +void OGLBuffer::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenBuffers(1, &handle); +} + +void OGLBuffer::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteBuffers(1, &handle); + OpenGLState::GetCurState().ResetBuffer(handle).Apply(); + handle = 0; +} + +void OGLSync::Create() { + if (handle != 0) + return; + + // Don't profile here, this one is expected to happen ingame. + handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); +} + +void OGLSync::Release() { + if (handle == 0) + return; + + // Don't profile here, this one is expected to happen ingame. + glDeleteSync(handle); + handle = 0; +} + +void OGLVertexArray::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenVertexArrays(1, &handle); +} + +void OGLVertexArray::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteVertexArrays(1, &handle); + OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); + handle = 0; +} + +void OGLFramebuffer::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenFramebuffers(1, &handle); +} + +void OGLFramebuffer::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteFramebuffers(1, &handle); + OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); + handle = 0; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 3bc1b83b5..e33f1e973 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -8,7 +8,6 @@ #include <glad/glad.h> #include "common/common_types.h" #include "video_core/renderer_opengl/gl_shader_util.h" -#include "video_core/renderer_opengl/gl_state.h" namespace OpenGL { @@ -29,20 +28,10 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create() { - if (handle != 0) - return; - glGenTextures(1, &handle); - } + void Create(); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteTextures(1, &handle); - OpenGLState::GetCurState().UnbindTexture(handle).Apply(); - handle = 0; - } + void Release(); GLuint handle = 0; }; @@ -64,20 +53,10 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create() { - if (handle != 0) - return; - glGenSamplers(1, &handle); - } + void Create(); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteSamplers(1, &handle); - OpenGLState::GetCurState().ResetSampler(handle).Apply(); - handle = 0; - } + void Release(); GLuint handle = 0; }; @@ -98,20 +77,9 @@ public: return *this; } - void Create(const char* source, GLenum type) { - if (handle != 0) - return; - if (source == nullptr) - return; - handle = GLShader::LoadShader(source, type); - } + void Create(const char* source, GLenum type); - void Release() { - if (handle == 0) - return; - glDeleteShader(handle); - handle = 0; - } + void Release(); GLuint handle = 0; }; @@ -141,25 +109,10 @@ public: /// Creates a new internal OpenGL resource and stores the handle void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, - bool separable_program = false) { - OGLShader vert, geo, frag; - if (vert_shader) - vert.Create(vert_shader, GL_VERTEX_SHADER); - if (geo_shader) - geo.Create(geo_shader, GL_GEOMETRY_SHADER); - if (frag_shader) - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - Create(separable_program, vert.handle, geo.handle, frag.handle); - } + bool separable_program = false); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteProgram(handle); - OpenGLState::GetCurState().ResetProgram(handle).Apply(); - handle = 0; - } + void Release(); GLuint handle = 0; }; @@ -178,20 +131,10 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create() { - if (handle != 0) - return; - glGenProgramPipelines(1, &handle); - } + void Create(); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteProgramPipelines(1, &handle); - OpenGLState::GetCurState().ResetPipeline(handle).Apply(); - handle = 0; - } + void Release(); GLuint handle = 0; }; @@ -213,20 +156,10 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create() { - if (handle != 0) - return; - glGenBuffers(1, &handle); - } + void Create(); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteBuffers(1, &handle); - OpenGLState::GetCurState().ResetBuffer(handle).Apply(); - handle = 0; - } + void Release(); GLuint handle = 0; }; @@ -247,19 +180,10 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create() { - if (handle != 0) - return; - handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - } + void Create(); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteSync(handle); - handle = 0; - } + void Release(); GLsync handle = 0; }; @@ -281,20 +205,10 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create() { - if (handle != 0) - return; - glGenVertexArrays(1, &handle); - } + void Create(); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteVertexArrays(1, &handle); - OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); - handle = 0; - } + void Release(); GLuint handle = 0; }; @@ -316,20 +230,10 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create() { - if (handle != 0) - return; - glGenFramebuffers(1, &handle); - } + void Create(); /// Deletes the internal OpenGL resource - void Release() { - if (handle == 0) - return; - glDeleteFramebuffers(1, &handle); - OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); - handle = 0; - } + void Release(); GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1a03a677f..a85a7c0c5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -6,9 +6,10 @@ #include "core/core.h" #include "core/memory.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/utils.h" +#include "video_core/renderer_opengl/utils.h" namespace OpenGL { @@ -89,7 +90,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) shader.Create(program_result.first.c_str(), gl_type); program.Create(true, shader.handle); SetShaderUniformBlockBindings(program.handle); - VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr); + LabelGLObject(GL_PROGRAM, program.handle, addr); } else { // Store shader's code to lazily build it on draw geometry_programs.code = program_result.first; @@ -120,20 +121,26 @@ GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) { } GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, - const std::string& glsl_topology, + const std::string& glsl_topology, u32 max_vertices, const std::string& debug_name) { if (target_program.handle != 0) { return target_program.handle; } - const std::string source{geometry_programs.code + "layout (" + glsl_topology + ") in;\n"}; + std::string source = "#version 430 core\n"; + source += "layout (" + glsl_topology + ") in;\n"; + source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; + source += geometry_programs.code; + OGLShader shader; shader.Create(source.c_str(), GL_GEOMETRY_SHADER); target_program.Create(true, shader.handle); SetShaderUniformBlockBindings(target_program.handle); - VideoCore::LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); + LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); return target_program.handle; }; +ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} + Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const VAddr program_addr{GetShaderAddress(program)}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a210f1731..ffbf21831 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -16,6 +16,8 @@ namespace OpenGL { class CachedShader; +class RasterizerOpenGL; + using Shader = std::shared_ptr<CachedShader>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -46,22 +48,23 @@ public: } switch (primitive_mode) { case GL_POINTS: - return LazyGeometryProgram(geometry_programs.points, "points", "ShaderPoints"); + return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); case GL_LINES: case GL_LINE_STRIP: - return LazyGeometryProgram(geometry_programs.lines, "lines", "ShaderLines"); + return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines"); case GL_LINES_ADJACENCY: case GL_LINE_STRIP_ADJACENCY: - return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", + return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4, "ShaderLinesAdjacency"); case GL_TRIANGLES: case GL_TRIANGLE_STRIP: case GL_TRIANGLE_FAN: - return LazyGeometryProgram(geometry_programs.triangles, "triangles", "ShaderTriangles"); + return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3, + "ShaderTriangles"); case GL_TRIANGLES_ADJACENCY: case GL_TRIANGLE_STRIP_ADJACENCY: return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency", - "ShaderLines"); + 6, "ShaderTrianglesAdjacency"); default: UNREACHABLE_MSG("Unknown primitive mode."); } @@ -76,7 +79,7 @@ public: private: /// Generates a geometry shader or returns one that already exists. GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology, - const std::string& debug_name); + u32 max_vertices, const std::string& debug_name); VAddr addr; Maxwell::ShaderProgram program_type; @@ -104,6 +107,8 @@ private: class ShaderCacheOpenGL final : public RasterizerCache<Shader> { public: + explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer); + /// Gets the current specified shader stage program Shader GetStageProgram(Maxwell::ShaderProgram program); }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index fe4d1bd83..5fde22ad4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -3,11 +3,12 @@ // Refer to the license.txt file included. #include <map> +#include <optional> #include <set> #include <string> #include <string_view> +#include <unordered_set> -#include <boost/optional.hpp> #include <fmt/format.h> #include "common/assert.h" @@ -143,7 +144,7 @@ private: for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { const Instruction instr = {program_code[offset]}; if (const auto opcode = OpCode::Decode(instr)) { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::EXIT: { // The EXIT instruction can be predicated, which means that the shader can // conditionally end on this instruction. We have to consider the case where the @@ -276,7 +277,8 @@ public: GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, const Tegra::Shader::Header& header) - : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header} { + : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, + fixed_pipeline_output_attributes_used{}, local_memory_size{0} { BuildRegisterList(); BuildInputList(); } @@ -339,10 +341,10 @@ public: */ void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, u64 dest_num_components, u64 value_num_components, - bool is_saturated = false, u64 dest_elem = 0) { + bool is_saturated = false, u64 dest_elem = 0, bool precise = false) { SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, - dest_num_components, value_num_components, dest_elem); + dest_num_components, value_num_components, dest_elem, precise); } /** @@ -366,11 +368,12 @@ public: const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', - dest_num_components, value_num_components, dest_elem); + dest_num_components, value_num_components, dest_elem, false); if (sets_cc) { const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); + LOG_WARNING(HW_GPU, "Control Codes Imcomplete."); } } @@ -414,7 +417,7 @@ public: } }(); - SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); + SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); } /** @@ -428,12 +431,31 @@ public: */ void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute, const Tegra::Shader::IpaMode& input_mode, - boost::optional<Register> vertex = {}) { + std::optional<Register> vertex = {}) { const std::string dest = GetRegisterAsFloat(reg); const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem); shader.AddLine(dest + " = " + src + ';'); } + std::string GetLocalMemoryAsFloat(const std::string& index) { + return "lmem[" + index + ']'; + } + + std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) { + const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; + return func + "(lmem[" + index + "])"; + } + + void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { + shader.AddLine("lmem[" + index + "] = " + value + ';'); + } + + void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, + bool is_signed = false) { + const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; + shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); + } + std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { switch (cc) { case Tegra::Shader::ControlCode::NEU: @@ -472,15 +494,20 @@ public: // instruction for now. if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry - // shader. These instructions use a dirty register as buffer index. To avoid some - // drivers from complaining for the out of boundary writes, guard them. - const std::string buf_index{"min(" + GetRegisterAsInteger(buf_reg) + ", " + - std::to_string(MAX_GEOMETRY_BUFFERS - 1) + ')'}; + // shader. These instructions use a dirty register as buffer index, to avoid some + // drivers from complaining about out of boundary writes, guard them. + const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + + std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; shader.AddLine("amem[" + buf_index + "][" + std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) + " = " + src + ';'); } else { - shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); + if (attribute == Attribute::Index::PointSize) { + fixed_pipeline_output_attributes_used.insert(attribute); + shader.AddLine(dest + " = " + src + ';'); + } else { + shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); + } } } } @@ -524,7 +551,9 @@ public: /// Add declarations. void GenerateDeclarations(const std::string& suffix) { + GenerateVertex(); GenerateRegisters(suffix); + GenerateLocalMemory(); GenerateInternalFlags(); GenerateInputAttrs(); GenerateOutputAttrs(); @@ -570,6 +599,10 @@ public: return entry.GetName(); } + void SetLocalMemory(u64 lmem) { + local_memory_size = lmem; + } + private: /// Generates declarations for registers. void GenerateRegisters(const std::string& suffix) { @@ -580,6 +613,15 @@ private: declarations.AddNewLine(); } + /// Generates declarations for local memory. + void GenerateLocalMemory() { + if (local_memory_size > 0) { + declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + + "];"); + declarations.AddNewLine(); + } + } + /// Generates declarations for internal flags. void GenerateInternalFlags() { for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { @@ -683,6 +725,20 @@ private: declarations.AddNewLine(); } + void GenerateVertex() { + if (stage != Maxwell3D::Regs::ShaderStage::Vertex) + return; + declarations.AddLine("out gl_PerVertex {"); + ++declarations.scope; + declarations.AddLine("vec4 gl_Position;"); + for (auto& o : fixed_pipeline_output_attributes_used) { + if (o == Attribute::Index::PointSize) + declarations.AddLine("float gl_PointSize;"); + } + --declarations.scope; + declarations.AddLine("};"); + } + /// Generates code representing a temporary (GPR) register. std::string GetRegister(const Register& reg, unsigned elem) { if (reg == Register::ZeroIndex) { @@ -702,7 +758,8 @@ private: * @param dest_elem Optional, the destination element to use for the operation. */ void SetRegister(const Register& reg, u64 elem, const std::string& value, - u64 dest_num_components, u64 value_num_components, u64 dest_elem) { + u64 dest_num_components, u64 value_num_components, u64 dest_elem, + bool precise) { if (reg == Register::ZeroIndex) { LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); UNREACHABLE(); @@ -719,7 +776,18 @@ private: src += GetSwizzle(elem); } - shader.AddLine(dest + " = " + src + ';'); + if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { + shader.AddLine('{'); + ++shader.scope; + // This avoids optimizations of constant propagation and keeps the code as the original + // Sadly using the precise keyword causes "linking" errors on fragment shaders. + shader.AddLine("precise float tmp = " + src + ';'); + shader.AddLine(dest + " = tmp;"); + --shader.scope; + shader.AddLine('}'); + } else { + shader.AddLine(dest + " = " + src + ';'); + } } /// Build the GLSL register list. @@ -740,10 +808,14 @@ private: /// Generates code representing an input attribute register. std::string GetInputAttribute(Attribute::Index attribute, const Tegra::Shader::IpaMode& input_mode, - boost::optional<Register> vertex = {}) { + std::optional<Register> vertex = {}) { auto GeometryPass = [&](const std::string& name) { if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) { - return "gs_" + name + '[' + GetRegisterAsInteger(vertex.value(), 0, false) + ']'; + // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set + // an 0x80000000 index for those and the shader fails to build. Find out why this + // happens and what's its intent. + return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) + + " % MAX_VERTEX_INPUT]"; } return name; }; @@ -836,6 +908,8 @@ private: /// Generates code representing the declaration name of an output attribute register. std::string GetOutputAttribute(Attribute::Index attribute) { switch (attribute) { + case Attribute::Index::PointSize: + return "gl_PointSize"; case Attribute::Index::Position: return "position"; default: @@ -870,6 +944,8 @@ private: const Maxwell3D::Regs::ShaderStage& stage; const std::string& suffix; const Tegra::Shader::Header& header; + std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; + u64 local_memory_size; }; class GLSLGenerator { @@ -879,6 +955,8 @@ public: : subroutines(subroutines), program_code(program_code), main_offset(main_offset), stage(stage), suffix(suffix) { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); + local_memory_size = header.GetLocalMemorySize(); + regs.SetLocalMemory(local_memory_size); Generate(suffix); } @@ -1392,7 +1470,7 @@ private: } shader.AddLine( - fmt::format("// {}: {} (0x{:016x})", offset, opcode->GetName(), instr.value)); + fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value)); using Tegra::Shader::Pred; ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, @@ -1400,7 +1478,7 @@ private: // Some instructions (like SSY) don't have a predicate field, they are always // unconditionally executed. - bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId()); + bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { shader.AddLine("if (" + @@ -1410,7 +1488,7 @@ private: ++shader.scope; } - switch (opcode->GetType()) { + switch (opcode->get().GetType()) { case OpCode::Type::Arithmetic: { std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); @@ -1427,7 +1505,7 @@ private: } } - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::MOV_C: case OpCode::Id::MOV_R: { // MOV does not have neither 'abs' nor 'neg' bits. @@ -1449,8 +1527,13 @@ private: ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented"); op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "FMUL Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::FADD_C: @@ -1458,8 +1541,13 @@ private: case OpCode::Id::FADD_IMM: { op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "FADD Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::MUFU: { @@ -1467,31 +1555,31 @@ private: switch (instr.sub_op) { case SubOp::Cos: regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Sin: regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Ex2: regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Lg2: regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Rcp: regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Rsq: regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Sqrt: regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; default: LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", @@ -1512,7 +1600,11 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', - 1, 1); + 1, 1, false, 0, true); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "FMNMX Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::RRO_C: @@ -1525,14 +1617,15 @@ private: break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", + opcode->get().GetName()); UNREACHABLE(); } } break; } case OpCode::Type::ArithmeticImmediate: { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::MOV32_IMM: { regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1); break; @@ -1541,7 +1634,11 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), - 1, 1, instr.fmul32.saturate); + 1, 1, instr.fmul32.saturate, 0, true); + if (instr.op_32.generates_cc) { + LOG_CRITICAL(HW_GPU, "FMUL32 Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::FADD32I: { @@ -1564,7 +1661,11 @@ private: op_b = "-(" + op_b + ')'; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); + if (instr.op_32.generates_cc) { + LOG_CRITICAL(HW_GPU, "FADD32 Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } } @@ -1576,7 +1677,7 @@ private: std::string op_a = instr.bfe.negate_a ? "-" : ""; op_a += regs.GetRegisterAsInteger(instr.gpr8); - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::BFE_IMM: { std::string inner_shift = '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')'; @@ -1585,10 +1686,14 @@ private: std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')'; regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "BFE Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->get().GetName()); UNREACHABLE(); } } @@ -1610,7 +1715,7 @@ private: } } - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { @@ -1622,15 +1727,23 @@ private: // Cast to int is superfluous for arithmetic shift, it's only for a logical shift regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')', 1, 1); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "SHR Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: case OpCode::Id::SHL_IMM: regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "SHL Generates an unhandled Control Code"); + UNREACHABLE(); + } break; default: { - LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->get().GetName()); UNREACHABLE(); } } @@ -1640,13 +1753,17 @@ private: std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); std::string op_b = std::to_string(instr.alu.imm20_32.Value()); - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::IADD32I: if (instr.iadd32i.negate_a) op_a = "-(" + op_a + ')'; regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, instr.iadd32i.saturate != 0); + if (instr.op_32.generates_cc) { + LOG_CRITICAL(HW_GPU, "IADD32 Generates an unhandled Control Code"); + UNREACHABLE(); + } break; case OpCode::Id::LOP32I: { if (instr.alu.lop32i.invert_a) @@ -1658,11 +1775,15 @@ private: WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, Tegra::Shader::PredicateResultMode::None, Tegra::Shader::Pred::UnusedIndex); + if (instr.op_32.generates_cc) { + LOG_CRITICAL(HW_GPU, "LOP32I Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } default: { LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}", - opcode->GetName()); + opcode->get().GetName()); UNREACHABLE(); } } @@ -1682,7 +1803,7 @@ private: } } - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { @@ -1694,6 +1815,10 @@ private: regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, instr.alu.saturate_d); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "IADD Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::IADD3_C: @@ -1718,7 +1843,7 @@ private: } }; - if (opcode->GetId() == OpCode::Id::IADD3_R) { + if (opcode->get().GetId() == OpCode::Id::IADD3_R) { apply_height(instr.iadd3.height_a, op_a); apply_height(instr.iadd3.height_b, op_b); apply_height(instr.iadd3.height_c, op_c); @@ -1734,7 +1859,7 @@ private: op_c = "-(" + op_c + ')'; std::string result; - if (opcode->GetId() == OpCode::Id::IADD3_R) { + if (opcode->get().GetId() == OpCode::Id::IADD3_R) { switch (instr.iadd3.mode) { case Tegra::Shader::IAdd3Mode::RightShift: // TODO(tech4me): According to @@ -1755,6 +1880,11 @@ private: } regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1); + + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "IADD3 Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::ISCADD_C: @@ -1770,6 +1900,10 @@ private: regs.SetRegisterToInteger(instr.gpr0, true, 0, "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "ISCADD Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::POPC_C: @@ -1801,6 +1935,10 @@ private: WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "LOP Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::LOP3_C: @@ -1809,13 +1947,17 @@ private: const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); std::string lut; - if (opcode->GetId() == OpCode::Id::LOP3_R) { + if (opcode->get().GetId() == OpCode::Id::LOP3_R) { lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')'; } else { lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')'; } WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "LOP3 Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::IMNMX_C: @@ -1830,6 +1972,10 @@ private: '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', 1, 1); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "IMNMX Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::LEA_R2: @@ -1839,7 +1985,7 @@ private: case OpCode::Id::LEA_HI: { std::string op_c; - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::LEA_R2: { op_a = regs.GetRegisterAsInteger(instr.gpr20); op_b = regs.GetRegisterAsInteger(instr.gpr39); @@ -1884,7 +2030,8 @@ private: op_b = regs.GetRegisterAsInteger(instr.gpr8); op_a = std::to_string(instr.lea.imm.entry_a); op_c = std::to_string(instr.lea.imm.entry_b); - LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", + opcode->get().GetName()); UNREACHABLE(); } } @@ -1899,7 +2046,7 @@ private: } default: { LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}", - opcode->GetName()); + opcode->get().GetName()); UNREACHABLE(); } } @@ -1907,20 +2054,21 @@ private: break; } case OpCode::Type::ArithmeticHalf: { - if (opcode->GetId() == OpCode::Id::HADD2_C || opcode->GetId() == OpCode::Id::HADD2_R) { + if (opcode->get().GetId() == OpCode::Id::HADD2_C || + opcode->get().GetId() == OpCode::Id::HADD2_R) { ASSERT_MSG(instr.alu_half.ftz == 0, "Unimplemented"); } const bool negate_a = - opcode->GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; + opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; const bool negate_b = - opcode->GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; + opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; const std::string op_a = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a, instr.alu_half.abs_a != 0, negate_a); std::string op_b; - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HMUL2_C: op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, @@ -1938,7 +2086,7 @@ private: op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b); const std::string result = [&]() { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_R: return '(' + op_a + " + " + op_b + ')'; @@ -1946,7 +2094,8 @@ private: case OpCode::Id::HMUL2_R: return '(' + op_a + " * " + op_b + ')'; default: - LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", + opcode->get().GetName()); UNREACHABLE(); return std::string("0"); } @@ -1957,7 +2106,7 @@ private: break; } case OpCode::Type::ArithmeticHalfImmediate: { - if (opcode->GetId() == OpCode::Id::HADD2_IMM) { + if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { ASSERT_MSG(instr.alu_half_imm.ftz == 0, "Unimplemented"); } else { ASSERT_MSG(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None, @@ -1971,7 +2120,7 @@ private: const std::string op_b = UnpackHalfImmediate(instr, true); const std::string result = [&]() { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::HADD2_IMM: return op_a + " + " + op_b; case OpCode::Id::HMUL2_IMM: @@ -1997,7 +2146,7 @@ private: ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::FFMA_CR: { op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, GLSLRegister::Type::Float); @@ -2021,24 +2170,29 @@ private: break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->get().GetName()); UNREACHABLE(); } } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1, - instr.alu.saturate_d); + regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', + 1, 1, instr.alu.saturate_d, 0, true); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "FFMA Generates an unhandled Control Code"); + UNREACHABLE(); + } + break; } case OpCode::Type::Hfma2: { - if (opcode->GetId() == OpCode::Id::HFMA2_RR) { + if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None, "Unimplemented"); } else { ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None, "Unimplemented"); } - const bool saturate = opcode->GetId() == OpCode::Id::HFMA2_RR + const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR ? instr.hfma2.rr.saturate != 0 : instr.hfma2.saturate != 0; @@ -2046,7 +2200,7 @@ private: GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a); std::string op_b, op_c; - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::HFMA2_CR: op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, GLSLRegister::Type::UnsignedInteger), @@ -2084,7 +2238,7 @@ private: break; } case OpCode::Type::Conversion: { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::I2I_R: { ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); @@ -2132,6 +2286,11 @@ private: } regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "I2F Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::F2F_R: { @@ -2170,6 +2329,11 @@ private: } regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); + + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "F2F Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } case OpCode::Id::F2I_R: @@ -2219,17 +2383,22 @@ private: regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1, false, 0, instr.conversion.dest_size); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "F2I Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", + opcode->get().GetName()); UNREACHABLE(); } } break; } case OpCode::Type::Memory: { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::LD_A: { // Note: Shouldn't this be interp mode flat? As in no interpolation made. ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, @@ -2299,6 +2468,39 @@ private: shader.AddLine("}"); break; } + case OpCode::Id::LD_L: { + // Add an extra scope and declare the index register inside to prevent + // overwriting it in case it is used as an output of the LD instruction. + shader.AddLine('{'); + ++shader.scope; + + std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + + std::to_string(instr.smem_imm.Value()) + ')'; + + shader.AddLine("uint index = (" + op + " / 4);"); + + const std::string op_a = regs.GetLocalMemoryAsFloat("index"); + + if (instr.ld_l.unknown != 1) { + LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}", + static_cast<unsigned>(instr.ld_l.unknown.Value())); + UNREACHABLE(); + } + + switch (instr.ldst_sl.type.Value()) { + case Tegra::Shader::StoreType::Bytes32: + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + break; + default: + LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}", + static_cast<unsigned>(instr.ldst_sl.type.Value())); + UNREACHABLE(); + } + + --shader.scope; + shader.AddLine('}'); + break; + } case OpCode::Id::ST_A: { ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -2327,6 +2529,37 @@ private: break; } + case OpCode::Id::ST_L: { + // Add an extra scope and declare the index register inside to prevent + // overwriting it in case it is used as an output of the LD instruction. + shader.AddLine('{'); + ++shader.scope; + + std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + + std::to_string(instr.smem_imm.Value()) + ')'; + + shader.AddLine("uint index = (" + op + " / 4);"); + + if (instr.st_l.unknown != 0) { + LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}", + static_cast<unsigned>(instr.st_l.unknown.Value())); + UNREACHABLE(); + } + + switch (instr.ldst_sl.type.Value()) { + case Tegra::Shader::StoreType::Bytes32: + regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); + break; + default: + LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}", + static_cast<unsigned>(instr.ldst_sl.type.Value())); + UNREACHABLE(); + } + + --shader.scope; + shader.AddLine('}'); + break; + } case OpCode::Id::TEX: { Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; std::string coord; @@ -2513,12 +2746,12 @@ private: } case 3: { if (is_array) { - UNIMPLEMENTED_MSG("3-coordinate arrays not fully implemented"); - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - texture_type = Tegra::Shader::TextureType::Texture2D; - is_array = false; + const std::string index = regs.GetRegisterAsInteger(instr.gpr8); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); + const std::string z = regs.GetRegisterAsFloat(instr.gpr20); + coord = + "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + ");"; } else { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); @@ -2548,7 +2781,11 @@ private: break; } case Tegra::Shader::TextureProcessMode::LZ: { - texture = "textureLod(" + sampler + ", coords, 0.0)"; + if (depth_compare && is_array) { + texture = "texture(" + sampler + ", coords)"; + } else { + texture = "textureLod(" + sampler + ", coords, 0.0)"; + } break; } case Tegra::Shader::TextureProcessMode::LL: { @@ -2809,7 +3046,7 @@ private: break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->get().GetName()); UNREACHABLE(); } } @@ -2903,7 +3140,7 @@ private: instr.hsetp2.abs_a, instr.hsetp2.negate_a); const std::string op_b = [&]() { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::HSETP2_R: return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), instr.hsetp2.type_b, instr.hsetp2.abs_a, @@ -2962,10 +3199,15 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1); } + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "PSET Generates an unhandled Control Code"); + UNREACHABLE(); + } + break; } case OpCode::Type::PredicateSetPredicate: { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::PSETP: { const std::string op_a = GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); @@ -3011,7 +3253,8 @@ private: break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", + opcode->get().GetName()); UNREACHABLE(); } } @@ -3099,7 +3342,7 @@ private: instr.hset2.abs_a != 0, instr.hset2.negate_a != 0); const std::string op_b = [&]() { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::HSET2_R: return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), instr.hset2.type_b, instr.hset2.abs_b != 0, @@ -3148,7 +3391,7 @@ private: const bool is_signed{instr.xmad.sign_a == 1}; bool is_merge{}; - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::XMAD_CR: { is_merge = instr.xmad.merge_56; op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, @@ -3177,7 +3420,7 @@ private: break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->get().GetName()); UNREACHABLE(); } } @@ -3226,15 +3469,25 @@ private: } regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "XMAD Generates an unhandled Control Code"); + UNREACHABLE(); + } break; } default: { - switch (opcode->GetId()) { + switch (opcode->get().GetId()) { case OpCode::Id::EXIT: { if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { EmitFragmentOutputsWrite(); } + const Tegra::Shader::ControlCode cc = instr.flow_control_code; + if (cc != Tegra::Shader::ControlCode::T) { + LOG_CRITICAL(HW_GPU, "EXIT Control Code used: {}", static_cast<u32>(cc)); + UNREACHABLE(); + } + switch (instr.flow.cond) { case Tegra::Shader::FlowCondition::Always: shader.AddLine("return true;"); @@ -3264,6 +3517,11 @@ private: // Enclose "discard" in a conditional, so that GLSL compilation does not complain // about unexecuted instructions that may follow this. + const Tegra::Shader::ControlCode cc = instr.flow_control_code; + if (cc != Tegra::Shader::ControlCode::T) { + LOG_CRITICAL(HW_GPU, "KIL Control Code used: {}", static_cast<u32>(cc)); + UNREACHABLE(); + } shader.AddLine("if (true) {"); ++shader.scope; shader.AddLine("discard;"); @@ -3321,6 +3579,11 @@ private: case OpCode::Id::BRA: { ASSERT_MSG(instr.bra.constant_buffer == 0, "BRA with constant buffers are not implemented"); + const Tegra::Shader::ControlCode cc = instr.flow_control_code; + if (cc != Tegra::Shader::ControlCode::T) { + LOG_CRITICAL(HW_GPU, "BRA Control Code used: {}", static_cast<u32>(cc)); + UNREACHABLE(); + } const u32 target = offset + instr.bra.GetBranchTarget(); shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); break; @@ -3361,13 +3624,21 @@ private: } case OpCode::Id::SYNC: { // The SYNC opcode jumps to the address previously set by the SSY opcode - ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + const Tegra::Shader::ControlCode cc = instr.flow_control_code; + if (cc != Tegra::Shader::ControlCode::T) { + LOG_CRITICAL(HW_GPU, "SYNC Control Code used: {}", static_cast<u32>(cc)); + UNREACHABLE(); + } EmitPopFromFlowStack(); break; } case OpCode::Id::BRK: { // The BRK opcode jumps to the address previously set by the PBK opcode - ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + const Tegra::Shader::ControlCode cc = instr.flow_control_code; + if (cc != Tegra::Shader::ControlCode::T) { + LOG_CRITICAL(HW_GPU, "BRK Control Code used: {}", static_cast<u32>(cc)); + UNREACHABLE(); + } EmitPopFromFlowStack(); break; } @@ -3397,6 +3668,11 @@ private: regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, instr.vmad.saturate == 1, 0, Register::Size::Word, instr.vmad.cc); + if (instr.generates_cc) { + LOG_CRITICAL(HW_GPU, "VMAD Generates an unhandled Control Code"); + UNREACHABLE(); + } + break; } case OpCode::Id::VSETP: { @@ -3424,7 +3700,7 @@ private: break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->get().GetName()); UNREACHABLE(); } } @@ -3550,6 +3826,7 @@ private: const u32 main_offset; Maxwell3D::Regs::ShaderStage stage; const std::string& suffix; + u64 local_memory_size; ShaderWriter shader; ShaderWriter declarations; @@ -3564,9 +3841,9 @@ std::string GetCommonDeclarations() { RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4)); } -boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage, - const std::string& suffix) { +std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix) { try { const auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines(); @@ -3575,7 +3852,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, } catch (const DecompileFail& exception) { LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); } - return boost::none; + return {}; } } // namespace OpenGL::GLShader::Decompiler diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index b20cc4bfa..d01a4a7ee 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -6,8 +6,8 @@ #include <array> #include <functional> +#include <optional> #include <string> -#include <boost/optional.hpp> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -18,8 +18,8 @@ using Tegra::Engines::Maxwell3D; std::string GetCommonDeclarations(); -boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage, - const std::string& suffix); +std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix); } // namespace OpenGL::GLShader::Decompiler diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index e883ffb1d..eea090e52 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -19,9 +19,6 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) { out += Decompiler::GetCommonDeclarations(); out += R"( -out gl_PerVertex { - vec4 gl_Position; -}; layout (location = 0) out vec4 position; @@ -40,7 +37,7 @@ layout(std140) uniform vs_config { ProgramResult program = Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, Maxwell3D::Regs::ShaderStage::Vertex, "vertex") - .get_value_or({}); + .value_or(ProgramResult()); out += program.first; @@ -48,7 +45,7 @@ layout(std140) uniform vs_config { ProgramResult program_b = Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") - .get_value_or({}); + .value_or(ProgramResult()); out += program_b.first; } @@ -85,15 +82,15 @@ void main() { } ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { - std::string out = "#version 430 core\n"; - out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + // Version is intentionally skipped in shader generation, it's added by the lazy compilation. + std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); out += "bool exec_geometry();\n"; ProgramResult program = Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, Maxwell3D::Regs::ShaderStage::Geometry, "geometry") - .get_value_or({}); + .value_or(ProgramResult()); out += R"( out gl_PerVertex { vec4 gl_Position; @@ -127,7 +124,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { ProgramResult program = Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, Maxwell3D::Regs::ShaderStage::Fragment, "fragment") - .get_value_or({}); + .value_or(ProgramResult()); out += R"( layout(location = 0) out vec4 FragColor0; layout(location = 1) out vec4 FragColor1; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 36fe1f04c..2a069cdd8 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -7,6 +7,7 @@ #include <glad/glad.h> #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL::GLShader { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 1fe26a2a9..98622a058 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -11,9 +11,10 @@ namespace OpenGL { OpenGLState OpenGLState::cur_state; - +bool OpenGLState::s_rgb_used; OpenGLState::OpenGLState() { // These all match default OpenGL values + framebuffer_srgb.enabled = false; cull.enabled = false; cull.mode = GL_BACK; cull.front_face = GL_CCW; @@ -22,11 +23,14 @@ OpenGLState::OpenGLState() { depth.test_func = GL_LESS; depth.write_mask = GL_TRUE; - color_mask.red_enabled = GL_TRUE; - color_mask.green_enabled = GL_TRUE; - color_mask.blue_enabled = GL_TRUE; - color_mask.alpha_enabled = GL_TRUE; - + primitive_restart.enabled = false; + primitive_restart.index = 0; + for (auto& item : color_mask) { + item.red_enabled = GL_TRUE; + item.green_enabled = GL_TRUE; + item.blue_enabled = GL_TRUE; + item.alpha_enabled = GL_TRUE; + } stencil.test_enabled = false; auto reset_stencil = [](auto& config) { config.test_func = GL_ALWAYS; @@ -39,19 +43,33 @@ OpenGLState::OpenGLState() { }; reset_stencil(stencil.front); reset_stencil(stencil.back); - - blend.enabled = true; - blend.rgb_equation = GL_FUNC_ADD; - blend.a_equation = GL_FUNC_ADD; - blend.src_rgb_func = GL_ONE; - blend.dst_rgb_func = GL_ZERO; - blend.src_a_func = GL_ONE; - blend.dst_a_func = GL_ZERO; - blend.color.red = 0.0f; - blend.color.green = 0.0f; - blend.color.blue = 0.0f; - blend.color.alpha = 0.0f; - + for (auto& item : viewports) { + item.x = 0; + item.y = 0; + item.width = 0; + item.height = 0; + item.depth_range_near = 0.0f; + item.depth_range_far = 1.0f; + } + scissor.enabled = false; + scissor.x = 0; + scissor.y = 0; + scissor.width = 0; + scissor.height = 0; + for (auto& item : blend) { + item.enabled = true; + item.rgb_equation = GL_FUNC_ADD; + item.a_equation = GL_FUNC_ADD; + item.src_rgb_func = GL_ONE; + item.dst_rgb_func = GL_ZERO; + item.src_a_func = GL_ONE; + item.dst_a_func = GL_ZERO; + } + independant_blend.enabled = false; + blend_color.red = 0.0f; + blend_color.green = 0.0f; + blend_color.blue = 0.0f; + blend_color.alpha = 0.0f; logic_op.enabled = false; logic_op.operation = GL_COPY; @@ -67,138 +85,309 @@ OpenGLState::OpenGLState() { draw.shader_program = 0; draw.program_pipeline = 0; - scissor.enabled = false; - scissor.x = 0; - scissor.y = 0; - scissor.width = 0; - scissor.height = 0; - - viewport.x = 0; - viewport.y = 0; - viewport.width = 0; - viewport.height = 0; - clip_distance = {}; point.size = 1; } -void OpenGLState::Apply() const { +void OpenGLState::ApplyDefaultState() { + glDisable(GL_FRAMEBUFFER_SRGB); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_PRIMITIVE_RESTART); + glDisable(GL_STENCIL_TEST); + glEnable(GL_BLEND); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_SCISSOR_TEST); +} + +void OpenGLState::ApplySRgb() const { + // sRGB + if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { + if (framebuffer_srgb.enabled) { + // Track if sRGB is used + s_rgb_used = true; + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); + } + } +} + +void OpenGLState::ApplyCulling() const { // Culling - if (cull.enabled != cur_state.cull.enabled) { + const bool cull_changed = cull.enabled != cur_state.cull.enabled; + if (cull_changed) { if (cull.enabled) { glEnable(GL_CULL_FACE); } else { glDisable(GL_CULL_FACE); } } + if (cull.enabled) { + if (cull_changed || cull.mode != cur_state.cull.mode) { + glCullFace(cull.mode); + } - if (cull.mode != cur_state.cull.mode) { - glCullFace(cull.mode); + if (cull_changed || cull.front_face != cur_state.cull.front_face) { + glFrontFace(cull.front_face); + } } +} - if (cull.front_face != cur_state.cull.front_face) { - glFrontFace(cull.front_face); +void OpenGLState::ApplyColorMask() const { + if (GLAD_GL_ARB_viewport_array) { + for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + const auto& updated = color_mask[i]; + const auto& current = cur_state.color_mask[i]; + if (updated.red_enabled != current.red_enabled || + updated.green_enabled != current.green_enabled || + updated.blue_enabled != current.blue_enabled || + updated.alpha_enabled != current.alpha_enabled) { + glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, + updated.blue_enabled, updated.alpha_enabled); + } + } + } else { + const auto& updated = color_mask[0]; + const auto& current = cur_state.color_mask[0]; + if (updated.red_enabled != current.red_enabled || + updated.green_enabled != current.green_enabled || + updated.blue_enabled != current.blue_enabled || + updated.alpha_enabled != current.alpha_enabled) { + glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled, + updated.alpha_enabled); + } } +} +void OpenGLState::ApplyDepth() const { // Depth test - if (depth.test_enabled != cur_state.depth.test_enabled) { + const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled; + if (depth_test_changed) { if (depth.test_enabled) { glEnable(GL_DEPTH_TEST); } else { glDisable(GL_DEPTH_TEST); } } - - if (depth.test_func != cur_state.depth.test_func) { + if (depth.test_enabled && + (depth_test_changed || depth.test_func != cur_state.depth.test_func)) { glDepthFunc(depth.test_func); } - // Depth mask if (depth.write_mask != cur_state.depth.write_mask) { glDepthMask(depth.write_mask); } +} - // Color mask - if (color_mask.red_enabled != cur_state.color_mask.red_enabled || - color_mask.green_enabled != cur_state.color_mask.green_enabled || - color_mask.blue_enabled != cur_state.color_mask.blue_enabled || - color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { - glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled, - color_mask.alpha_enabled); +void OpenGLState::ApplyPrimitiveRestart() const { + const bool primitive_restart_changed = + primitive_restart.enabled != cur_state.primitive_restart.enabled; + if (primitive_restart_changed) { + if (primitive_restart.enabled) { + glEnable(GL_PRIMITIVE_RESTART); + } else { + glDisable(GL_PRIMITIVE_RESTART); + } + } + if (primitive_restart_changed || + (primitive_restart.enabled && + primitive_restart.index != cur_state.primitive_restart.index)) { + glPrimitiveRestartIndex(primitive_restart.index); } +} - // Stencil test - if (stencil.test_enabled != cur_state.stencil.test_enabled) { +void OpenGLState::ApplyStencilTest() const { + const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled; + if (stencil_test_changed) { if (stencil.test_enabled) { glEnable(GL_STENCIL_TEST); } else { glDisable(GL_STENCIL_TEST); } } - auto config_stencil = [](GLenum face, const auto& config, const auto& prev_config) { - if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || - config.test_mask != prev_config.test_mask) { - glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); + if (stencil.test_enabled) { + auto config_stencil = [stencil_test_changed](GLenum face, const auto& config, + const auto& prev_config) { + if (stencil_test_changed || config.test_func != prev_config.test_func || + config.test_ref != prev_config.test_ref || + config.test_mask != prev_config.test_mask) { + glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); + } + if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail || + config.action_depth_pass != prev_config.action_depth_pass || + config.action_stencil_fail != prev_config.action_stencil_fail) { + glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, + config.action_depth_pass); + } + if (config.write_mask != prev_config.write_mask) { + glStencilMaskSeparate(face, config.write_mask); + } + }; + config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front); + config_stencil(GL_BACK, stencil.back, cur_state.stencil.back); + } +} + +void OpenGLState::ApplyScissor() const { + const bool scissor_changed = scissor.enabled != cur_state.scissor.enabled; + if (scissor_changed) { + if (scissor.enabled) { + glEnable(GL_SCISSOR_TEST); + } else { + glDisable(GL_SCISSOR_TEST); } - if (config.action_depth_fail != prev_config.action_depth_fail || - config.action_depth_pass != prev_config.action_depth_pass || - config.action_stencil_fail != prev_config.action_stencil_fail) { - glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, - config.action_depth_pass); + } + if (scissor.enabled && + (scissor_changed || scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y || + scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height)) { + glScissor(scissor.x, scissor.y, scissor.width, scissor.height); + } +} + +void OpenGLState::ApplyViewport() const { + if (GLAD_GL_ARB_viewport_array) { + for (GLuint i = 0; + i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); i++) { + const auto& current = cur_state.viewports[i]; + const auto& updated = viewports[i]; + if (updated.x != current.x || updated.y != current.y || + updated.width != current.width || updated.height != current.height) { + glViewportIndexedf(i, updated.x, updated.y, updated.width, updated.height); + } + if (updated.depth_range_near != current.depth_range_near || + updated.depth_range_far != current.depth_range_far) { + glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); + } } - if (config.write_mask != prev_config.write_mask) { - glStencilMaskSeparate(face, config.write_mask); + } else { + const auto& current = cur_state.viewports[0]; + const auto& updated = viewports[0]; + if (updated.x != current.x || updated.y != current.y || updated.width != current.width || + updated.height != current.height) { + glViewport(static_cast<GLint>(updated.x), static_cast<GLint>(updated.y), + static_cast<GLsizei>(updated.width), static_cast<GLsizei>(updated.height)); } - }; - config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front); - config_stencil(GL_BACK, stencil.back, cur_state.stencil.back); + if (updated.depth_range_near != current.depth_range_near || + updated.depth_range_far != current.depth_range_far) { + glDepthRange(updated.depth_range_near, updated.depth_range_far); + } + } +} - // Blending - if (blend.enabled != cur_state.blend.enabled) { - if (blend.enabled) { - ASSERT(!logic_op.enabled); +void OpenGLState::ApplyGlobalBlending() const { + const Blend& current = cur_state.blend[0]; + const Blend& updated = blend[0]; + const bool blend_changed = updated.enabled != current.enabled; + if (blend_changed) { + if (updated.enabled) { glEnable(GL_BLEND); } else { glDisable(GL_BLEND); } } + if (!updated.enabled) { + return; + } + if (updated.separate_alpha) { + if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + updated.dst_rgb_func != current.dst_rgb_func || + updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) { + glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, + updated.dst_a_func); + } + + if (blend_changed || updated.rgb_equation != current.rgb_equation || + updated.a_equation != current.a_equation) { + glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); + } + } else { + if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + updated.dst_rgb_func != current.dst_rgb_func) { + glBlendFunc(updated.src_rgb_func, updated.dst_rgb_func); + } + + if (blend_changed || updated.rgb_equation != current.rgb_equation) { + glBlendEquation(updated.rgb_equation); + } + } +} - if (blend.color.red != cur_state.blend.color.red || - blend.color.green != cur_state.blend.color.green || - blend.color.blue != cur_state.blend.color.blue || - blend.color.alpha != cur_state.blend.color.alpha) { - glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha); +void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { + const Blend& updated = blend[target]; + const Blend& current = cur_state.blend[target]; + const bool blend_changed = updated.enabled != current.enabled || force; + if (blend_changed) { + if (updated.enabled) { + glEnablei(GL_BLEND, static_cast<GLuint>(target)); + } else { + glDisablei(GL_BLEND, static_cast<GLuint>(target)); + } + } + if (!updated.enabled) { + return; } + if (updated.separate_alpha) { + if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + updated.dst_rgb_func != current.dst_rgb_func || + updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) { + glBlendFuncSeparateiARB(static_cast<GLuint>(target), updated.src_rgb_func, + updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); + } + + if (blend_changed || updated.rgb_equation != current.rgb_equation || + updated.a_equation != current.a_equation) { + glBlendEquationSeparateiARB(static_cast<GLuint>(target), updated.rgb_equation, + updated.a_equation); + } + } else { + if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + updated.dst_rgb_func != current.dst_rgb_func) { + glBlendFunciARB(static_cast<GLuint>(target), updated.src_rgb_func, + updated.dst_rgb_func); + } - if (blend.src_rgb_func != cur_state.blend.src_rgb_func || - blend.dst_rgb_func != cur_state.blend.dst_rgb_func || - blend.src_a_func != cur_state.blend.src_a_func || - blend.dst_a_func != cur_state.blend.dst_a_func) { - glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, - blend.dst_a_func); + if (blend_changed || updated.rgb_equation != current.rgb_equation) { + glBlendEquationiARB(static_cast<GLuint>(target), updated.rgb_equation); + } } +} - if (blend.rgb_equation != cur_state.blend.rgb_equation || - blend.a_equation != cur_state.blend.a_equation) { - glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); +void OpenGLState::ApplyBlending() const { + if (independant_blend.enabled) { + for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + ApplyTargetBlending(i, + independant_blend.enabled != cur_state.independant_blend.enabled); + } + } else { + ApplyGlobalBlending(); + } + if (blend_color.red != cur_state.blend_color.red || + blend_color.green != cur_state.blend_color.green || + blend_color.blue != cur_state.blend_color.blue || + blend_color.alpha != cur_state.blend_color.alpha) { + glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); } +} - // Logic Operation - if (logic_op.enabled != cur_state.logic_op.enabled) { +void OpenGLState::ApplyLogicOp() const { + const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled; + if (logic_op_changed) { if (logic_op.enabled) { - ASSERT(!blend.enabled); glEnable(GL_COLOR_LOGIC_OP); } else { glDisable(GL_COLOR_LOGIC_OP); } } - if (logic_op.operation != cur_state.logic_op.operation) { + if (logic_op.enabled && + (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) { glLogicOp(logic_op.operation); } +} - // Textures +void OpenGLState::ApplyTextures() const { for (std::size_t i = 0; i < std::size(texture_units); ++i) { const auto& texture_unit = texture_units[i]; const auto& cur_state_texture_unit = cur_state.texture_units[i]; @@ -217,28 +406,29 @@ void OpenGLState::Apply() const { glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data()); } } +} - // Samplers - { - bool has_delta{}; - std::size_t first{}, last{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; - for (std::size_t i = 0; i < std::size(samplers); ++i) { - samplers[i] = texture_units[i].sampler; - if (samplers[i] != cur_state.texture_units[i].sampler) { - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; +void OpenGLState::ApplySamplers() const { + bool has_delta{}; + std::size_t first{}, last{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; + for (std::size_t i = 0; i < std::size(samplers); ++i) { + samplers[i] = texture_units[i].sampler; + if (samplers[i] != cur_state.texture_units[i].sampler) { + if (!has_delta) { + first = i; + has_delta = true; } + last = i; } - if (has_delta) { - glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - samplers.data()); - } } + if (has_delta) { + glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), + samplers.data()); + } +} +void OpenGLState::ApplyFramebufferState() const { // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); @@ -246,7 +436,9 @@ void OpenGLState::Apply() const { if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); } +} +void OpenGLState::ApplyVertexBufferState() const { // Vertex array if (draw.vertex_array != cur_state.draw.vertex_array) { glBindVertexArray(draw.vertex_array); @@ -256,7 +448,11 @@ void OpenGLState::Apply() const { if (draw.vertex_buffer != cur_state.draw.vertex_buffer) { glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer); } +} +void OpenGLState::Apply() const { + ApplyFramebufferState(); + ApplyVertexBufferState(); // Uniform buffer if (draw.uniform_buffer != cur_state.draw.uniform_buffer) { glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer); @@ -271,27 +467,6 @@ void OpenGLState::Apply() const { if (draw.program_pipeline != cur_state.draw.program_pipeline) { glBindProgramPipeline(draw.program_pipeline); } - - // Scissor test - if (scissor.enabled != cur_state.scissor.enabled) { - if (scissor.enabled) { - glEnable(GL_SCISSOR_TEST); - } else { - glDisable(GL_SCISSOR_TEST); - } - } - - if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y || - scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) { - glScissor(scissor.x, scissor.y, scissor.width, scissor.height); - } - - if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y || - viewport.width != cur_state.viewport.width || - viewport.height != cur_state.viewport.height) { - glViewport(viewport.x, viewport.y, viewport.width, viewport.height); - } - // Clip distance for (std::size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { @@ -302,12 +477,22 @@ void OpenGLState::Apply() const { } } } - // Point if (point.size != cur_state.point.size) { glPointSize(point.size); } - + ApplyColorMask(); + ApplyViewport(); + ApplyScissor(); + ApplyStencilTest(); + ApplySRgb(); + ApplyCulling(); + ApplyDepth(); + ApplyPrimitiveRestart(); + ApplyBlending(); + ApplyLogicOp(); + ApplyTextures(); + ApplySamplers(); cur_state = *this; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index dc21a2ee3..e5d1baae6 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -36,6 +36,10 @@ constexpr TextureUnit ProcTexDiffLUT{9}; class OpenGLState { public: struct { + bool enabled; // GL_FRAMEBUFFER_SRGB + } framebuffer_srgb; + + struct { bool enabled; // GL_CULL_FACE GLenum mode; // GL_CULL_FACE_MODE GLenum front_face; // GL_FRONT_FACE @@ -48,12 +52,18 @@ public: } depth; struct { + bool enabled; + GLuint index; + } primitive_restart; // GL_PRIMITIVE_RESTART + + struct ColorMask { GLboolean red_enabled; GLboolean green_enabled; GLboolean blue_enabled; GLboolean alpha_enabled; - } color_mask; // GL_COLOR_WRITEMASK - + }; + std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> + color_mask; // GL_COLOR_WRITEMASK struct { bool test_enabled; // GL_STENCIL_TEST struct { @@ -67,22 +77,28 @@ public: } front, back; } stencil; - struct { + struct Blend { bool enabled; // GL_BLEND + bool separate_alpha; // Independent blend enabled GLenum rgb_equation; // GL_BLEND_EQUATION_RGB GLenum a_equation; // GL_BLEND_EQUATION_ALPHA GLenum src_rgb_func; // GL_BLEND_SRC_RGB GLenum dst_rgb_func; // GL_BLEND_DST_RGB GLenum src_a_func; // GL_BLEND_SRC_ALPHA GLenum dst_a_func; // GL_BLEND_DST_ALPHA + }; + std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend; - struct { - GLclampf red; - GLclampf green; - GLclampf blue; - GLclampf alpha; - } color; // GL_BLEND_COLOR - } blend; + struct { + bool enabled; + } independant_blend; + + struct { + GLclampf red; + GLclampf green; + GLclampf blue; + GLclampf alpha; + } blend_color; // GL_BLEND_COLOR struct { bool enabled; // GL_LOGIC_OP_MODE @@ -127,6 +143,16 @@ public: GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING } draw; + struct viewport { + GLfloat x; + GLfloat y; + GLfloat width; + GLfloat height; + GLfloat depth_range_near; // GL_DEPTH_RANGE + GLfloat depth_range_far; // GL_DEPTH_RANGE + }; + std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> viewports; + struct { bool enabled; // GL_SCISSOR_TEST GLint x; @@ -136,13 +162,6 @@ public: } scissor; struct { - GLint x; - GLint y; - GLsizei width; - GLsizei height; - } viewport; - - struct { float size; // GL_POINT_SIZE } point; @@ -154,10 +173,20 @@ public: static OpenGLState GetCurState() { return cur_state; } - + static bool GetsRGBUsed() { + return s_rgb_used; + } + static void ClearsRGBUsed() { + s_rgb_used = false; + } /// Apply this state as the current OpenGL state void Apply() const; - + /// Apply only the state afecting the framebuffer + void ApplyFramebufferState() const; + /// Apply only the state afecting the vertex buffer + void ApplyVertexBufferState() const; + /// Set the initial OpenGL state + static void ApplyDefaultState(); /// Resets any references to the given resource OpenGLState& UnbindTexture(GLuint handle); OpenGLState& ResetSampler(GLuint handle); @@ -169,6 +198,23 @@ public: private: static OpenGLState cur_state; + // Workaround for sRGB problems caused by + // QT not supporting srgb output + static bool s_rgb_used; + void ApplySRgb() const; + void ApplyCulling() const; + void ApplyColorMask() const; + void ApplyDepth() const; + void ApplyPrimitiveRestart() const; + void ApplyStencilTest() const; + void ApplyViewport() const; + void ApplyTargetBlending(std::size_t target, bool force) const; + void ApplyGlobalBlending() const; + void ApplyBlending() const; + void ApplyLogicOp() const; + void ApplyTextures() const; + void ApplySamplers() const; + void ApplyScissor() const; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index e409228cc..b97b895a4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -6,9 +6,13 @@ #include <vector> #include "common/alignment.h" #include "common/assert.h" +#include "common/microprofile.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" +MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", + MP_RGB(128, 128, 192)); + namespace OpenGL { OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) @@ -75,6 +79,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a } if (invalidate || !persistent) { + MICROPROFILE_SCOPE(OpenGL_StreamBuffer); GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 0f6dcab2b..3ce2cc6d2 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -135,17 +135,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return {}; } -inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) { +inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, + Tegra::Texture::TextureMipmapFilter mip_filter_mode) { switch (filter_mode) { - case Tegra::Texture::TextureFilter::Linear: - return GL_LINEAR; - case Tegra::Texture::TextureFilter::Nearest: - return GL_NEAREST; + case Tegra::Texture::TextureFilter::Linear: { + switch (mip_filter_mode) { + case Tegra::Texture::TextureMipmapFilter::None: + return GL_LINEAR; + case Tegra::Texture::TextureMipmapFilter::Nearest: + return GL_NEAREST_MIPMAP_LINEAR; + case Tegra::Texture::TextureMipmapFilter::Linear: + return GL_LINEAR_MIPMAP_LINEAR; + } } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}", - static_cast<u32>(filter_mode)); - UNREACHABLE(); - return {}; + case Tegra::Texture::TextureFilter::Nearest: { + switch (mip_filter_mode) { + case Tegra::Texture::TextureMipmapFilter::None: + return GL_NEAREST; + case Tegra::Texture::TextureMipmapFilter::Nearest: + return GL_NEAREST_MIPMAP_NEAREST; + case Tegra::Texture::TextureMipmapFilter::Linear: + return GL_LINEAR_MIPMAP_NEAREST; + } + } + } + LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); + return GL_LINEAR; } inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { @@ -166,9 +181,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: return GL_MIRROR_CLAMP_TO_EDGE; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); + return GL_REPEAT; } inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { @@ -190,10 +204,9 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { case Tegra::Texture::DepthCompareFunc::Always: return GL_ALWAYS; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture depth compare function ={}", - static_cast<u32>(func)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}", + static_cast<u32>(func)); + return GL_GREATER; } inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { @@ -209,9 +222,8 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::Max: return GL_MAX; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); + return GL_FUNC_ADD; } inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { @@ -274,9 +286,8 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return GL_ONE_MINUS_CONSTANT_ALPHA; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); + return GL_ZERO; } inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { @@ -295,9 +306,8 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { case Tegra::Texture::SwizzleSource::OneFloat: return GL_ONE; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); + return GL_ZERO; } inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { @@ -327,33 +337,39 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { case Maxwell::ComparisonOp::AlwaysOld: return GL_ALWAYS; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); + return GL_ALWAYS; } inline GLenum StencilOp(Maxwell::StencilOp stencil) { switch (stencil) { case Maxwell::StencilOp::Keep: + case Maxwell::StencilOp::KeepOGL: return GL_KEEP; case Maxwell::StencilOp::Zero: + case Maxwell::StencilOp::ZeroOGL: return GL_ZERO; case Maxwell::StencilOp::Replace: + case Maxwell::StencilOp::ReplaceOGL: return GL_REPLACE; case Maxwell::StencilOp::Incr: + case Maxwell::StencilOp::IncrOGL: return GL_INCR; case Maxwell::StencilOp::Decr: + case Maxwell::StencilOp::DecrOGL: return GL_DECR; case Maxwell::StencilOp::Invert: + case Maxwell::StencilOp::InvertOGL: return GL_INVERT; case Maxwell::StencilOp::IncrWrap: + case Maxwell::StencilOp::IncrWrapOGL: return GL_INCR_WRAP; case Maxwell::StencilOp::DecrWrap: + case Maxwell::StencilOp::DecrWrapOGL: return GL_DECR_WRAP; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil)); + return GL_KEEP; } inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { @@ -363,9 +379,8 @@ inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { case Maxwell::Cull::FrontFace::CounterClockWise: return GL_CCW; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); + return GL_CCW; } inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { @@ -377,9 +392,8 @@ inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { case Maxwell::Cull::CullFace::FrontAndBack: return GL_FRONT_AND_BACK; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); + return GL_BACK; } inline GLenum LogicOp(Maxwell::LogicOperation operation) { @@ -417,9 +431,8 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { case Maxwell::LogicOperation::Set: return GL_SET; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation)); - UNREACHABLE(); - return {}; + LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation)); + return GL_COPY; } } // namespace MaxwellToGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 96d916b07..ea38da932 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -115,7 +115,8 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window) RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) -void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) { +void RendererOpenGL::SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { ScopeAcquireGLContext acquire_context{render_window}; Core::System::GetInstance().GetPerfStats().EndSystemFrame(); @@ -124,11 +125,11 @@ void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig& OpenGLState prev_state = OpenGLState::GetCurState(); state.Apply(); - if (framebuffer != boost::none) { + if (framebuffer) { // If framebuffer is provided, reload it from memory to a texture - if (screen_info.texture.width != (GLsizei)framebuffer->width || - screen_info.texture.height != (GLsizei)framebuffer->height || - screen_info.texture.pixel_format != framebuffer->pixel_format) { + if (screen_info.texture.width != (GLsizei)framebuffer->get().width || + screen_info.texture.height != (GLsizei)framebuffer->get().height || + screen_info.texture.pixel_format != framebuffer->get().pixel_format) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. @@ -283,7 +284,8 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - + // Initialize sRGB Usage + OpenGLState::ClearsRGBUsed(); rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info); } @@ -356,13 +358,20 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, state.texture_units[0].texture = screen_info.display_texture; state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; + // Workaround brigthness problems in SMO by enabling sRGB in the final output + // if it has been used in the frame + // Needed because of this bug in QT + // QTBUG-50987 + state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); state.Apply(); - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - + // restore default state + state.framebuffer_srgb.enabled = false; state.texture_units[0].texture = 0; state.Apply(); + // Clear sRGB state for the next frame + OpenGLState::ClearsRGBUsed(); } /** diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 961467a62..c0868c0e4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -51,7 +51,8 @@ public: ~RendererOpenGL() override; /// Swap buffers (render frame) - void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) override; + void SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; /// Initialize the renderer bool Init() override; diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp new file mode 100644 index 000000000..d84634cb3 --- /dev/null +++ b/src/video_core/renderer_opengl/utils.cpp @@ -0,0 +1,38 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> +#include <fmt/format.h> +#include <glad/glad.h> +#include "common/common_types.h" +#include "video_core/renderer_opengl/utils.h" + +namespace OpenGL { + +void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) { + if (!GLAD_GL_KHR_debug) { + return; // We don't need to throw an error as this is just for debugging + } + const std::string nice_addr = fmt::format("0x{:016x}", addr); + std::string object_label; + + if (extra_info.empty()) { + switch (identifier) { + case GL_TEXTURE: + object_label = "Texture@" + nice_addr; + break; + case GL_PROGRAM: + object_label = "Shader@" + nice_addr; + break; + default: + object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr); + break; + } + } else { + object_label = extra_info + '@' + nice_addr; + } + glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); +} + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h new file mode 100644 index 000000000..1fcb6fc11 --- /dev/null +++ b/src/video_core/renderer_opengl/utils.h @@ -0,0 +1,15 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <glad/glad.h> +#include "common/common_types.h" + +namespace OpenGL { + +void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = ""); + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp new file mode 100644 index 000000000..9582dd2ca --- /dev/null +++ b/src/video_core/surface.cpp @@ -0,0 +1,490 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/surface.h" + +namespace VideoCore::Surface { + +SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) { + switch (texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return SurfaceTarget::Texture1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return SurfaceTarget::Texture2D; + case Tegra::Texture::TextureType::Texture3D: + return SurfaceTarget::Texture3D; + case Tegra::Texture::TextureType::TextureCubemap: + return SurfaceTarget::TextureCubemap; + case Tegra::Texture::TextureType::TextureCubeArray: + return SurfaceTarget::TextureCubeArray; + case Tegra::Texture::TextureType::Texture1DArray: + return SurfaceTarget::Texture1DArray; + case Tegra::Texture::TextureType::Texture2DArray: + return SurfaceTarget::Texture2DArray; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type)); + UNREACHABLE(); + return SurfaceTarget::Texture2D; + } +} + +bool SurfaceTargetIsLayered(SurfaceTarget target) { + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + return false; + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: + return true; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); + UNREACHABLE(); + return false; + } +} + +PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { + switch (format) { + case Tegra::DepthFormat::S8_Z24_UNORM: + return PixelFormat::S8Z24; + case Tegra::DepthFormat::Z24_S8_UNORM: + return PixelFormat::Z24S8; + case Tegra::DepthFormat::Z32_FLOAT: + return PixelFormat::Z32F; + case Tegra::DepthFormat::Z16_UNORM: + return PixelFormat::Z16; + case Tegra::DepthFormat::Z32_S8_X24_FLOAT: + return PixelFormat::Z32FS8; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } +} + +PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { + switch (format) { + // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the + // gamma. + case Tegra::RenderTargetFormat::RGBA8_SRGB: + return PixelFormat::RGBA8_SRGB; + case Tegra::RenderTargetFormat::RGBA8_UNORM: + return PixelFormat::ABGR8U; + case Tegra::RenderTargetFormat::RGBA8_SNORM: + return PixelFormat::ABGR8S; + case Tegra::RenderTargetFormat::RGBA8_UINT: + return PixelFormat::ABGR8UI; + case Tegra::RenderTargetFormat::BGRA8_SRGB: + return PixelFormat::BGRA8_SRGB; + case Tegra::RenderTargetFormat::BGRA8_UNORM: + return PixelFormat::BGRA8; + case Tegra::RenderTargetFormat::RGB10_A2_UNORM: + return PixelFormat::A2B10G10R10U; + case Tegra::RenderTargetFormat::RGBA16_FLOAT: + return PixelFormat::RGBA16F; + case Tegra::RenderTargetFormat::RGBA16_UNORM: + return PixelFormat::RGBA16U; + case Tegra::RenderTargetFormat::RGBA16_UINT: + return PixelFormat::RGBA16UI; + case Tegra::RenderTargetFormat::RGBA32_FLOAT: + return PixelFormat::RGBA32F; + case Tegra::RenderTargetFormat::RG32_FLOAT: + return PixelFormat::RG32F; + case Tegra::RenderTargetFormat::R11G11B10_FLOAT: + return PixelFormat::R11FG11FB10F; + case Tegra::RenderTargetFormat::B5G6R5_UNORM: + return PixelFormat::B5G6R5U; + case Tegra::RenderTargetFormat::BGR5A1_UNORM: + return PixelFormat::A1B5G5R5U; + case Tegra::RenderTargetFormat::RGBA32_UINT: + return PixelFormat::RGBA32UI; + case Tegra::RenderTargetFormat::R8_UNORM: + return PixelFormat::R8U; + case Tegra::RenderTargetFormat::R8_UINT: + return PixelFormat::R8UI; + case Tegra::RenderTargetFormat::RG16_FLOAT: + return PixelFormat::RG16F; + case Tegra::RenderTargetFormat::RG16_UINT: + return PixelFormat::RG16UI; + case Tegra::RenderTargetFormat::RG16_SINT: + return PixelFormat::RG16I; + case Tegra::RenderTargetFormat::RG16_UNORM: + return PixelFormat::RG16; + case Tegra::RenderTargetFormat::RG16_SNORM: + return PixelFormat::RG16S; + case Tegra::RenderTargetFormat::RG8_UNORM: + return PixelFormat::RG8U; + case Tegra::RenderTargetFormat::RG8_SNORM: + return PixelFormat::RG8S; + case Tegra::RenderTargetFormat::R16_FLOAT: + return PixelFormat::R16F; + case Tegra::RenderTargetFormat::R16_UNORM: + return PixelFormat::R16U; + case Tegra::RenderTargetFormat::R16_SNORM: + return PixelFormat::R16S; + case Tegra::RenderTargetFormat::R16_UINT: + return PixelFormat::R16UI; + case Tegra::RenderTargetFormat::R16_SINT: + return PixelFormat::R16I; + case Tegra::RenderTargetFormat::R32_FLOAT: + return PixelFormat::R32F; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32UI; + case Tegra::RenderTargetFormat::RG32_UINT: + return PixelFormat::RG32UI; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } +} + +PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, + Tegra::Texture::ComponentType component_type, + bool is_srgb) { + // TODO(Subv): Properly implement this + switch (format) { + case Tegra::Texture::TextureFormat::A8R8G8B8: + if (is_srgb) { + return PixelFormat::RGBA8_SRGB; + } + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::ABGR8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::ABGR8S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::ABGR8UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::B5G6R5: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::B5G6R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::A2B10G10R10: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A2B10G10R10U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::A1B5G5R5: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A1B5G5R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::R8: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::R8U; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R8UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::G8R8: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::G8R8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::G8R8S; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::R16_G16_B16_A16: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::RGBA16U; + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGBA16F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::BF10GF11RF11: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R11FG11FB10F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::R32_G32_B32_A32: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGBA32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RGBA32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::R32_G32: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RG32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RG32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::R32_G32_B32: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGB32F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::R16: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R16F; + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::R16U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::R16S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R16UI; + case Tegra::Texture::ComponentType::SINT: + return PixelFormat::R16I; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::R32: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::ZF32: + return PixelFormat::Z32F; + case Tegra::Texture::TextureFormat::Z16: + return PixelFormat::Z16; + case Tegra::Texture::TextureFormat::Z24S8: + return PixelFormat::Z24S8; + case Tegra::Texture::TextureFormat::DXT1: + return is_srgb ? PixelFormat::DXT1_SRGB : PixelFormat::DXT1; + case Tegra::Texture::TextureFormat::DXT23: + return is_srgb ? PixelFormat::DXT23_SRGB : PixelFormat::DXT23; + case Tegra::Texture::TextureFormat::DXT45: + return is_srgb ? PixelFormat::DXT45_SRGB : PixelFormat::DXT45; + case Tegra::Texture::TextureFormat::DXN1: + return PixelFormat::DXN1; + case Tegra::Texture::TextureFormat::DXN2: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::DXN2UNORM; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::DXN2SNORM; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + case Tegra::Texture::TextureFormat::BC7U: + return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U; + case Tegra::Texture::TextureFormat::BC6H_UF16: + return PixelFormat::BC6H_UF16; + case Tegra::Texture::TextureFormat::BC6H_SF16: + return PixelFormat::BC6H_SF16; + case Tegra::Texture::TextureFormat::ASTC_2D_4X4: + return is_srgb ? PixelFormat::ASTC_2D_4X4_SRGB : PixelFormat::ASTC_2D_4X4; + case Tegra::Texture::TextureFormat::ASTC_2D_5X4: + return is_srgb ? PixelFormat::ASTC_2D_5X4_SRGB : PixelFormat::ASTC_2D_5X4; + case Tegra::Texture::TextureFormat::ASTC_2D_5X5: + return is_srgb ? PixelFormat::ASTC_2D_5X5_SRGB : PixelFormat::ASTC_2D_5X5; + case Tegra::Texture::TextureFormat::ASTC_2D_8X8: + return is_srgb ? PixelFormat::ASTC_2D_8X8_SRGB : PixelFormat::ASTC_2D_8X8; + case Tegra::Texture::TextureFormat::ASTC_2D_8X5: + return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; + case Tegra::Texture::TextureFormat::ASTC_2D_10X8: + return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; + case Tegra::Texture::TextureFormat::R16_G16: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RG16F; + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::RG16; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::RG16S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RG16UI; + case Tegra::Texture::ComponentType::SINT: + return PixelFormat::RG16I; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); + UNREACHABLE(); + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format), + static_cast<u32>(component_type)); + UNREACHABLE(); + } +} + +ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { + // TODO(Subv): Implement more component types + switch (type) { + case Tegra::Texture::ComponentType::UNORM: + return ComponentType::UNorm; + case Tegra::Texture::ComponentType::FLOAT: + return ComponentType::Float; + case Tegra::Texture::ComponentType::SNORM: + return ComponentType::SNorm; + case Tegra::Texture::ComponentType::UINT: + return ComponentType::UInt; + case Tegra::Texture::ComponentType::SINT: + return ComponentType::SInt; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); + UNREACHABLE(); + } +} + +ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) { + // TODO(Subv): Implement more render targets + switch (format) { + case Tegra::RenderTargetFormat::RGBA8_UNORM: + case Tegra::RenderTargetFormat::RGBA8_SRGB: + case Tegra::RenderTargetFormat::BGRA8_UNORM: + case Tegra::RenderTargetFormat::BGRA8_SRGB: + case Tegra::RenderTargetFormat::RGB10_A2_UNORM: + case Tegra::RenderTargetFormat::R8_UNORM: + case Tegra::RenderTargetFormat::RG16_UNORM: + case Tegra::RenderTargetFormat::R16_UNORM: + case Tegra::RenderTargetFormat::B5G6R5_UNORM: + case Tegra::RenderTargetFormat::BGR5A1_UNORM: + case Tegra::RenderTargetFormat::RG8_UNORM: + case Tegra::RenderTargetFormat::RGBA16_UNORM: + return ComponentType::UNorm; + case Tegra::RenderTargetFormat::RGBA8_SNORM: + case Tegra::RenderTargetFormat::RG16_SNORM: + case Tegra::RenderTargetFormat::R16_SNORM: + case Tegra::RenderTargetFormat::RG8_SNORM: + return ComponentType::SNorm; + case Tegra::RenderTargetFormat::RGBA16_FLOAT: + case Tegra::RenderTargetFormat::R11G11B10_FLOAT: + case Tegra::RenderTargetFormat::RGBA32_FLOAT: + case Tegra::RenderTargetFormat::RG32_FLOAT: + case Tegra::RenderTargetFormat::RG16_FLOAT: + case Tegra::RenderTargetFormat::R16_FLOAT: + case Tegra::RenderTargetFormat::R32_FLOAT: + return ComponentType::Float; + case Tegra::RenderTargetFormat::RGBA32_UINT: + case Tegra::RenderTargetFormat::RGBA16_UINT: + case Tegra::RenderTargetFormat::RG16_UINT: + case Tegra::RenderTargetFormat::R8_UINT: + case Tegra::RenderTargetFormat::R16_UINT: + case Tegra::RenderTargetFormat::RG32_UINT: + case Tegra::RenderTargetFormat::R32_UINT: + case Tegra::RenderTargetFormat::RGBA8_UINT: + return ComponentType::UInt; + case Tegra::RenderTargetFormat::RG16_SINT: + case Tegra::RenderTargetFormat::R16_SINT: + return ComponentType::SInt; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } +} + +PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { + switch (format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: + return PixelFormat::ABGR8U; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } +} + +ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { + switch (format) { + case Tegra::DepthFormat::Z16_UNORM: + case Tegra::DepthFormat::S8_Z24_UNORM: + case Tegra::DepthFormat::Z24_S8_UNORM: + return ComponentType::UNorm; + case Tegra::DepthFormat::Z32_FLOAT: + case Tegra::DepthFormat::Z32_S8_X24_FLOAT: + return ComponentType::Float; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } +} + +SurfaceType GetFormatType(PixelFormat pixel_format) { + if (static_cast<std::size_t>(pixel_format) < + static_cast<std::size_t>(PixelFormat::MaxColorFormat)) { + return SurfaceType::ColorTexture; + } + + if (static_cast<std::size_t>(pixel_format) < + static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) { + return SurfaceType::Depth; + } + + if (static_cast<std::size_t>(pixel_format) < + static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { + return SurfaceType::DepthStencil; + } + + // TODO(Subv): Implement the other formats + ASSERT(false); + + return SurfaceType::Invalid; +} + +bool IsPixelFormatASTC(PixelFormat format) { + switch (format) { + case PixelFormat::ASTC_2D_4X4: + case PixelFormat::ASTC_2D_5X4: + case PixelFormat::ASTC_2D_5X5: + case PixelFormat::ASTC_2D_8X8: + case PixelFormat::ASTC_2D_8X5: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_5X4_SRGB: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_SRGB: + return true; + default: + return false; + } +} + +std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { + return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; +} + +bool IsFormatBCn(PixelFormat format) { + switch (format) { + case PixelFormat::DXT1: + case PixelFormat::DXT23: + case PixelFormat::DXT45: + case PixelFormat::DXN1: + case PixelFormat::DXN2SNORM: + case PixelFormat::DXN2UNORM: + case PixelFormat::BC7U: + case PixelFormat::BC6H_UF16: + case PixelFormat::BC6H_SF16: + case PixelFormat::DXT1_SRGB: + case PixelFormat::DXT23_SRGB: + case PixelFormat::DXT45_SRGB: + case PixelFormat::BC7U_SRGB: + return true; + } + return false; +} + +} // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h new file mode 100644 index 000000000..0dd3eb2e4 --- /dev/null +++ b/src/video_core/surface.h @@ -0,0 +1,477 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <climits> +#include <utility> +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/gpu.h" +#include "video_core/textures/texture.h" + +namespace VideoCore::Surface { + +enum class PixelFormat { + ABGR8U = 0, + ABGR8S = 1, + ABGR8UI = 2, + B5G6R5U = 3, + A2B10G10R10U = 4, + A1B5G5R5U = 5, + R8U = 6, + R8UI = 7, + RGBA16F = 8, + RGBA16U = 9, + RGBA16UI = 10, + R11FG11FB10F = 11, + RGBA32UI = 12, + DXT1 = 13, + DXT23 = 14, + DXT45 = 15, + DXN1 = 16, // This is also known as BC4 + DXN2UNORM = 17, + DXN2SNORM = 18, + BC7U = 19, + BC6H_UF16 = 20, + BC6H_SF16 = 21, + ASTC_2D_4X4 = 22, + G8R8U = 23, + G8R8S = 24, + BGRA8 = 25, + RGBA32F = 26, + RG32F = 27, + R32F = 28, + R16F = 29, + R16U = 30, + R16S = 31, + R16UI = 32, + R16I = 33, + RG16 = 34, + RG16F = 35, + RG16UI = 36, + RG16I = 37, + RG16S = 38, + RGB32F = 39, + RGBA8_SRGB = 40, + RG8U = 41, + RG8S = 42, + RG32UI = 43, + R32UI = 44, + ASTC_2D_8X8 = 45, + ASTC_2D_8X5 = 46, + ASTC_2D_5X4 = 47, + BGRA8_SRGB = 48, + DXT1_SRGB = 49, + DXT23_SRGB = 50, + DXT45_SRGB = 51, + BC7U_SRGB = 52, + ASTC_2D_4X4_SRGB = 53, + ASTC_2D_8X8_SRGB = 54, + ASTC_2D_8X5_SRGB = 55, + ASTC_2D_5X4_SRGB = 56, + ASTC_2D_5X5 = 57, + ASTC_2D_5X5_SRGB = 58, + ASTC_2D_10X8 = 59, + ASTC_2D_10X8_SRGB = 60, + + MaxColorFormat, + + // Depth formats + Z32F = 61, + Z16 = 62, + + MaxDepthFormat, + + // DepthStencil formats + Z24S8 = 63, + S8Z24 = 64, + Z32FS8 = 65, + + MaxDepthStencilFormat, + + Max = MaxDepthStencilFormat, + Invalid = 255, +}; + +static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); + +enum class ComponentType { + Invalid = 0, + SNorm = 1, + UNorm = 2, + SInt = 3, + UInt = 4, + Float = 5, +}; + +enum class SurfaceType { + ColorTexture = 0, + Depth = 1, + DepthStencil = 2, + Fill = 3, + Invalid = 4, +}; + +enum class SurfaceTarget { + Texture1D, + Texture2D, + Texture3D, + Texture1DArray, + Texture2DArray, + TextureCubemap, + TextureCubeArray, +}; + +/** + * Gets the compression factor for the specified PixelFormat. This applies to just the + * "compressed width" and "compressed height", not the overall compression factor of a + * compressed image. This is used for maintaining proper surface sizes for compressed + * texture formats. + */ +static constexpr u32 GetCompressionFactor(PixelFormat format) { + if (format == PixelFormat::Invalid) + return 0; + + constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // G8R8U + 1, // G8R8S + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI + 4, // ASTC_2D_8X8 + 4, // ASTC_2D_8X5 + 4, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 4, // ASTC_2D_4X4_SRGB + 4, // ASTC_2D_8X8_SRGB + 4, // ASTC_2D_8X5_SRGB + 4, // ASTC_2D_5X4_SRGB + 4, // ASTC_2D_5X5 + 4, // ASTC_2D_5X5_SRGB + 4, // ASTC_2D_10X8 + 4, // ASTC_2D_10X8_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 + }}; + + ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); + return compression_factor_table[static_cast<std::size_t>(format)]; +} + +static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { + if (format == PixelFormat::Invalid) + return 0; + constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // G8R8U + 1, // G8R8S + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI + 8, // ASTC_2D_8X8 + 8, // ASTC_2D_8X5 + 5, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 8, // ASTC_2D_8X5_SRGB + 5, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 10, // ASTC_2D_10X8 + 10, // ASTC_2D_10X8_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 + }}; + ASSERT(static_cast<std::size_t>(format) < block_width_table.size()); + return block_width_table[static_cast<std::size_t>(format)]; +} + +static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { + if (format == PixelFormat::Invalid) + return 0; + + constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // G8R8U + 1, // G8R8S + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI + 8, // ASTC_2D_8X8 + 5, // ASTC_2D_8X5 + 4, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 5, // ASTC_2D_8X5_SRGB + 4, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 + }}; + + ASSERT(static_cast<std::size_t>(format) < block_height_table.size()); + return block_height_table[static_cast<std::size_t>(format)]; +} + +static constexpr u32 GetFormatBpp(PixelFormat format) { + if (format == PixelFormat::Invalid) + return 0; + + constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ + 32, // ABGR8U + 32, // ABGR8S + 32, // ABGR8UI + 16, // B5G6R5U + 32, // A2B10G10R10U + 16, // A1B5G5R5U + 8, // R8U + 8, // R8UI + 64, // RGBA16F + 64, // RGBA16U + 64, // RGBA16UI + 32, // R11FG11FB10F + 128, // RGBA32UI + 64, // DXT1 + 128, // DXT23 + 128, // DXT45 + 64, // DXN1 + 128, // DXN2UNORM + 128, // DXN2SNORM + 128, // BC7U + 128, // BC6H_UF16 + 128, // BC6H_SF16 + 128, // ASTC_2D_4X4 + 16, // G8R8U + 16, // G8R8S + 32, // BGRA8 + 128, // RGBA32F + 64, // RG32F + 32, // R32F + 16, // R16F + 16, // R16U + 16, // R16S + 16, // R16UI + 16, // R16I + 32, // RG16 + 32, // RG16F + 32, // RG16UI + 32, // RG16I + 32, // RG16S + 96, // RGB32F + 32, // RGBA8_SRGB + 16, // RG8U + 16, // RG8S + 64, // RG32UI + 32, // R32UI + 128, // ASTC_2D_8X8 + 128, // ASTC_2D_8X5 + 128, // ASTC_2D_5X4 + 32, // BGRA8_SRGB + 64, // DXT1_SRGB + 128, // DXT23_SRGB + 128, // DXT45_SRGB + 128, // BC7U + 128, // ASTC_2D_4X4_SRGB + 128, // ASTC_2D_8X8_SRGB + 128, // ASTC_2D_8X5_SRGB + 128, // ASTC_2D_5X4_SRGB + 128, // ASTC_2D_5X5 + 128, // ASTC_2D_5X5_SRGB + 128, // ASTC_2D_10X8 + 128, // ASTC_2D_10X8_SRGB + 32, // Z32F + 16, // Z16 + 32, // Z24S8 + 32, // S8Z24 + 64, // Z32FS8 + }}; + + ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); + return bpp_table[static_cast<std::size_t>(format)]; +} + +/// Returns the sizer in bytes of the specified pixel format +static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { + if (pixel_format == PixelFormat::Invalid) { + return 0; + } + return GetFormatBpp(pixel_format) / CHAR_BIT; +} + +SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); + +bool SurfaceTargetIsLayered(SurfaceTarget target); + +PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format); + +PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format); + +PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, + Tegra::Texture::ComponentType component_type, + bool is_srgb); + +ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type); + +ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format); + +PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format); + +ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format); + +SurfaceType GetFormatType(PixelFormat pixel_format); + +bool IsPixelFormatASTC(PixelFormat format); + +std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); + +/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN +bool IsFormatBCn(PixelFormat format); + +} // namespace VideoCore::Surface diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index b1feacae9..bc50a4876 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -1598,27 +1598,29 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, namespace Tegra::Texture::ASTC { std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, - uint32_t block_width, uint32_t block_height) { + uint32_t depth, uint32_t block_width, uint32_t block_height) { uint32_t blockIdx = 0; - std::vector<uint8_t> outData(height * width * 4); - for (uint32_t j = 0; j < height; j += block_height) { - for (uint32_t i = 0; i < width; i += block_width) { + std::vector<uint8_t> outData(height * width * depth * 4); + for (uint32_t k = 0; k < depth; k++) { + for (uint32_t j = 0; j < height; j += block_height) { + for (uint32_t i = 0; i < width; i += block_width) { - uint8_t* blockPtr = data.data() + blockIdx * 16; + uint8_t* blockPtr = data.data() + blockIdx * 16; - // Blocks can be at most 12x12 - uint32_t uncompData[144]; - ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); + // Blocks can be at most 12x12 + uint32_t uncompData[144]; + ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); - uint32_t decompWidth = std::min(block_width, width - i); - uint32_t decompHeight = std::min(block_height, height - j); + uint32_t decompWidth = std::min(block_width, width - i); + uint32_t decompHeight = std::min(block_height, height - j); - uint8_t* outRow = outData.data() + (j * width + i) * 4; - for (uint32_t jj = 0; jj < decompHeight; jj++) { - memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); - } + uint8_t* outRow = outData.data() + (j * width + i) * 4; + for (uint32_t jj = 0; jj < decompHeight; jj++) { + memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); + } - blockIdx++; + blockIdx++; + } } } diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index f0d7c0e56..d419dd025 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -10,6 +10,6 @@ namespace Tegra::Texture::ASTC { std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, - uint32_t block_width, uint32_t block_height); + uint32_t depth, uint32_t block_width, uint32_t block_height); } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index f1b40e7f5..c9160b467 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -45,7 +45,7 @@ constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>(); * Instead of going gob by gob, we map the coordinates inside a block and manage from * those. Block_Width is assumed to be 1. */ -void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, +void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end, const u32 z_end, const u32 tile_offset, const u32 xy_block_size, const u32 layer_z, const u32 stride_x, @@ -81,7 +81,7 @@ void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unsw * Instead of going gob by gob, we map the coordinates inside a block and manage from * those. Block_Width is assumed to be 1. */ -void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, +void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end, const u32 z_end, const u32 tile_offset, const u32 xy_block_size, const u32 layer_z, const u32 stride_x, @@ -90,10 +90,10 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz u32 z_address = tile_offset; const u32 x_startb = x_start * bytes_per_pixel; const u32 x_endb = x_end * bytes_per_pixel; - const u32 copy_size = 16; - const u32 gob_size_x = 64; - const u32 gob_size_y = 8; - const u32 gob_size_z = 1; + constexpr u32 copy_size = 16; + constexpr u32 gob_size_x = 64; + constexpr u32 gob_size_y = 8; + constexpr u32 gob_size_z = 1; const u32 gob_size = gob_size_x * gob_size_y * gob_size_z; for (u32 z = z_start; z < z_end; z++) { u32 y_address = z_address; @@ -126,24 +126,23 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces */ template <bool fast> -void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, const u32 width, - const u32 height, const u32 depth, const u32 bytes_per_pixel, +void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, + const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) { auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; const u32 stride_x = width * out_bytes_per_pixel; const u32 layer_z = height * stride_x; - const u32 gob_x_bytes = 64; + constexpr u32 gob_x_bytes = 64; const u32 gob_elements_x = gob_x_bytes / bytes_per_pixel; - const u32 gob_elements_y = 8; - const u32 gob_elements_z = 1; + constexpr u32 gob_elements_y = 8; + constexpr u32 gob_elements_z = 1; const u32 block_x_elements = gob_elements_x; const u32 block_y_elements = gob_elements_y * block_height; const u32 block_z_elements = gob_elements_z * block_depth; const u32 blocks_on_x = div_ceil(width, block_x_elements); const u32 blocks_on_y = div_ceil(height, block_y_elements); const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 blocks = blocks_on_x * blocks_on_y * blocks_on_z; - const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z; + constexpr u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z; const u32 xy_block_size = gob_size * block_height; const u32 block_size = xy_block_size * block_depth; u32 tile_offset = 0; @@ -172,7 +171,7 @@ void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, } void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth) { if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, @@ -203,6 +202,8 @@ u32 BytesPerPixel(TextureFormat format) { case TextureFormat::ASTC_2D_5X4: case TextureFormat::ASTC_2D_8X8: case TextureFormat::ASTC_2D_8X5: + case TextureFormat::ASTC_2D_10X8: + case TextureFormat::ASTC_2D_5X5: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::BF10GF11RF11: @@ -228,12 +229,21 @@ u32 BytesPerPixel(TextureFormat format) { } } -std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, - u32 height, u32 depth, u32 block_height, u32 block_depth) { +void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, + u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, + u32 block_depth) { + CopySwizzledData((width + tile_size_x - 1) / tile_size_x, + (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, + bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, + block_height, block_depth); +} + +std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, + u32 bytes_per_pixel, u32 width, u32 height, u32 depth, + u32 block_height, u32 block_depth) { std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); - CopySwizzledData(width / tile_size, height / tile_size, depth, bytes_per_pixel, bytes_per_pixel, - Memory::GetPointer(address), unswizzled_data.data(), true, block_height, - block_depth); + UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, + width, height, depth, block_height, block_depth); return unswizzled_data; } @@ -293,6 +303,8 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::BC6H_SF16: case TextureFormat::ASTC_2D_4X4: case TextureFormat::ASTC_2D_8X8: + case TextureFormat::ASTC_2D_5X5: + case TextureFormat::ASTC_2D_10X8: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A1B5G5R5: @@ -320,13 +332,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 gobs_in_x = 64 / bytes_per_pixel; - const u32 gobs_in_y = 8; - const u32 gobs_in_z = 1; - const u32 aligned_width = Common::AlignUp(width, gobs_in_x); + constexpr u32 gobs_in_x = 64; + constexpr u32 gobs_in_y = 8; + constexpr u32 gobs_in_z = 1; + const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gobs_in_x); const u32 aligned_height = Common::AlignUp(height, gobs_in_y * block_height); const u32 aligned_depth = Common::AlignUp(depth, gobs_in_z * block_depth); - return aligned_width * aligned_height * aligned_depth * bytes_per_pixel; + return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; } diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 4726f54a5..f4ef7c73e 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -10,11 +10,24 @@ namespace Tegra::Texture { +// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents +// an small rect of (64/bytes_per_pixel)X8. +inline std::size_t GetGOBSize() { + return 512; +} + +/** + * Unswizzles a swizzled texture without changing its format. + */ +void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, + u32 bytes_per_pixel, u32 width, u32 height, u32 depth, + u32 block_height = TICEntry::DefaultBlockHeight, + u32 block_depth = TICEntry::DefaultBlockHeight); /** * Unswizzles a swizzled texture without changing its format. */ -std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, - u32 height, u32 depth, +std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, + u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height = TICEntry::DefaultBlockHeight, u32 block_depth = TICEntry::DefaultBlockHeight); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 5947bd2b9..e199d019a 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -168,19 +168,29 @@ struct TICEntry { // High 16 bits of the pitch value BitField<0, 16, u32> pitch_high; - + BitField<26, 1, u32> use_header_opt_control; + BitField<27, 1, u32> depth_texture; BitField<28, 4, u32> max_mip_level; }; union { BitField<0, 16, u32> width_minus_1; + BitField<22, 1, u32> srgb_conversion; BitField<23, 4, TextureType> texture_type; + BitField<29, 3, u32> border_size; }; union { BitField<0, 16, u32> height_minus_1; BitField<16, 15, u32> depth_minus_1; }; + union { + BitField<6, 13, u32> mip_lod_bias; + BitField<27, 3, u32> max_anisotropy; + }; - INSERT_PADDING_BYTES(8); + union { + BitField<0, 4, u32> res_min_mip_level; + BitField<4, 4, u32> res_max_mip_level; + }; GPUVAddr Address() const { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); @@ -227,6 +237,10 @@ struct TICEntry { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; } + + bool IsSrgbConversionEnabled() const { + return srgb_conversion != 0; + } }; static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 237cc1307..e0a14d48f 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -161,30 +161,4 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe } } -static void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, - std::string extra_info = "") { - if (!GLAD_GL_KHR_debug) { - return; // We don't need to throw an error as this is just for debugging - } - const std::string nice_addr = fmt::format("0x{:016x}", addr); - std::string object_label; - - if (extra_info.empty()) { - switch (identifier) { - case GL_TEXTURE: - object_label = "Texture@" + nice_addr; - break; - case GL_PROGRAM: - object_label = "Shader@" + nice_addr; - break; - default: - object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr); - break; - } - } else { - object_label = extra_info + '@' + nice_addr; - } - glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); -} - } // namespace VideoCore |