summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp37
-rw-r--r--src/video_core/engines/maxwell_3d.h118
-rw-r--r--src/video_core/engines/shader_bytecode.h439
-rw-r--r--src/video_core/gpu.h3
-rw-r--r--src/video_core/rasterizer_interface.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp406
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h88
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp191
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h122
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h139
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp791
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp68
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h119
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h175
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp169
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h56
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h2
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp15
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h2
-rw-r--r--src/video_core/textures/decoders.cpp23
-rw-r--r--src/video_core/textures/decoders.h3
-rw-r--r--src/video_core/textures/texture.h29
-rw-r--r--src/video_core/utils.h2
29 files changed, 2545 insertions, 678 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a710c4bc5..281810357 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,7 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_compute.cpp
engines/maxwell_compute.h
+ engines/shader_bytecode.h
gpu.cpp
gpu.h
macro_interpreter.cpp
@@ -27,6 +28,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_decompiler.h
renderer_opengl/gl_shader_gen.cpp
renderer_opengl/gl_shader_gen.h
+ renderer_opengl/gl_shader_manager.cpp
+ renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state.cpp
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d7c3152f..2a3ff234a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -74,8 +74,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
regs.reg_array[method] = value;
-#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
-
switch (method) {
case MAXWELL3D_REG_INDEX(code_address.code_address_high):
case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
@@ -136,7 +134,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
break;
}
-#undef MAXWELL3D_REG_INDEX
+ VideoCore::g_renderer->Rasterizer()->NotifyMaxwellRegisterChanged(method);
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
@@ -165,6 +163,7 @@ void Maxwell3D::ProcessQueryGet() {
void Maxwell3D::DrawArrays() {
LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
regs.vertex_buffer.count);
+ ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
@@ -176,7 +175,8 @@ void Maxwell3D::DrawArrays() {
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
}
- VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/);
+ const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
+ VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed);
}
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
@@ -218,10 +218,12 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
- ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
- "TIC versions other than BlockLinear are unimplemented");
+ ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
+ tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
+ "TIC versions other than BlockLinear or Pitch are unimplemented");
- ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D,
+ ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
+ (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
"Texture types other than Texture2D are unimplemented");
auto r_type = tic_entry.r_type.Value();
@@ -301,5 +303,26 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
+bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
+ // The Vertex stage is always enabled.
+ if (stage == Regs::ShaderStage::Vertex)
+ return true;
+
+ switch (stage) {
+ case Regs::ShaderStage::TesselationControl:
+ return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)]
+ .enable != 0;
+ case Regs::ShaderStage::TesselationEval:
+ return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)]
+ .enable != 0;
+ case Regs::ShaderStage::Geometry:
+ return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0;
+ case Regs::ShaderStage::Fragment:
+ return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0;
+ }
+
+ UNREACHABLE();
+}
+
} // namespace Engines
} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 98b39b2ff..d4fcedace 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -20,6 +20,9 @@
namespace Tegra {
namespace Engines {
+#define MAXWELL3D_REG_INDEX(field_name) \
+ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
+
class Maxwell3D final {
public:
explicit Maxwell3D(MemoryManager& memory_manager);
@@ -248,6 +251,52 @@ public:
Patches = 0xe,
};
+ enum class IndexFormat : u32 {
+ UnsignedByte = 0x0,
+ UnsignedShort = 0x1,
+ UnsignedInt = 0x2,
+ };
+
+ struct Blend {
+ enum class Equation : u32 {
+ Add = 1,
+ Subtract = 2,
+ ReverseSubtract = 3,
+ Min = 4,
+ Max = 5,
+ };
+
+ enum class Factor : u32 {
+ Zero = 0x1,
+ One = 0x2,
+ SourceColor = 0x3,
+ OneMinusSourceColor = 0x4,
+ SourceAlpha = 0x5,
+ OneMinusSourceAlpha = 0x6,
+ DestAlpha = 0x7,
+ OneMinusDestAlpha = 0x8,
+ DestColor = 0x9,
+ OneMinusDestColor = 0xa,
+ SourceAlphaSaturate = 0xb,
+ Source1Color = 0x10,
+ OneMinusSource1Color = 0x11,
+ Source1Alpha = 0x12,
+ OneMinusSource1Alpha = 0x13,
+ ConstantColor = 0x61,
+ OneMinusConstantColor = 0x62,
+ ConstantAlpha = 0x63,
+ OneMinusConstantAlpha = 0x64,
+ };
+
+ u32 separate_alpha;
+ Equation equation_rgb;
+ Factor factor_source_rgb;
+ Factor factor_dest_rgb;
+ Equation equation_a;
+ Factor factor_source_a;
+ Factor factor_dest_a;
+ };
+
union {
struct {
INSERT_PADDING_WORDS(0x200);
@@ -270,7 +319,15 @@ public:
}
} rt[NumRenderTargets];
- INSERT_PADDING_WORDS(0x80);
+ struct {
+ f32 scale_x;
+ f32 scale_y;
+ f32 scale_z;
+ u32 translate_x;
+ u32 translate_y;
+ u32 translate_z;
+ INSERT_PADDING_WORDS(2);
+ } viewport_transform[NumViewports];
struct {
union {
@@ -375,7 +432,42 @@ public:
};
} draw;
- INSERT_PADDING_WORDS(0x139);
+ INSERT_PADDING_WORDS(0x6B);
+
+ struct {
+ u32 start_addr_high;
+ u32 start_addr_low;
+ u32 end_addr_high;
+ u32 end_addr_low;
+ IndexFormat format;
+ u32 first;
+ u32 count;
+
+ unsigned FormatSizeInBytes() const {
+ switch (format) {
+ case IndexFormat::UnsignedByte:
+ return 1;
+ case IndexFormat::UnsignedShort:
+ return 2;
+ case IndexFormat::UnsignedInt:
+ return 4;
+ }
+ UNREACHABLE();
+ }
+
+ GPUVAddr StartAddress() const {
+ return static_cast<GPUVAddr>(
+ (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
+ }
+
+ GPUVAddr EndAddress() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
+ end_addr_low);
+ }
+ } index_array;
+
+ INSERT_PADDING_WORDS(0xC7);
+
struct {
u32 query_address_high;
u32 query_address_low;
@@ -410,7 +502,9 @@ public:
}
} vertex_array[NumVertexArrays];
- INSERT_PADDING_WORDS(0x40);
+ Blend blend;
+
+ INSERT_PADDING_WORDS(0x39);
struct {
u32 limit_high;
@@ -427,14 +521,11 @@ public:
BitField<0, 1, u32> enable;
BitField<4, 4, ShaderProgram> program;
};
- u32 start_id;
- INSERT_PADDING_WORDS(1);
- u32 gpr_alloc;
- ShaderStage type;
- INSERT_PADDING_WORDS(9);
+ u32 offset;
+ INSERT_PADDING_WORDS(14);
} shader_config[MaxShaderProgram];
- INSERT_PADDING_WORDS(0x8C);
+ INSERT_PADDING_WORDS(0x80);
struct {
u32 cb_size;
@@ -507,6 +598,7 @@ public:
};
State state{};
+ MemoryManager& memory_manager;
/// Reads a register value located at the input method address
u32 GetRegisterValue(u32 method) const;
@@ -520,9 +612,10 @@ public:
/// Returns a list of enabled textures for the specified shader stage.
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
-private:
- MemoryManager& memory_manager;
+ /// Returns whether the specified shader stage is enabled or not.
+ bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
+private:
std::unordered_map<u32, std::vector<u32>> uploaded_macros;
/// Macro method that is currently being executed / being fed parameters.
@@ -564,6 +657,7 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(rt, 0x200);
+ASSERT_REG_POSITION(viewport_transform[0], 0x280);
ASSERT_REG_POSITION(viewport, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(zeta, 0x3F8);
@@ -573,8 +667,10 @@ ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
+ASSERT_REG_POSITION(blend, 0x780);
ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
ASSERT_REG_POSITION(shader_config[0], 0x800);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
new file mode 100644
index 000000000..5a006aee5
--- /dev/null
+++ b/src/video_core/engines/shader_bytecode.h
@@ -0,0 +1,439 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <cstring>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <boost/optional.hpp>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Shader {
+
+struct Register {
+ // Register 255 is special cased to always be 0
+ static constexpr size_t ZeroIndex = 255;
+
+ constexpr Register() = default;
+
+ constexpr Register(u64 value) : value(value) {}
+
+ constexpr operator u64() const {
+ return value;
+ }
+
+ template <typename T>
+ constexpr u64 operator-(const T& oth) const {
+ return value - oth;
+ }
+
+ template <typename T>
+ constexpr u64 operator&(const T& oth) const {
+ return value & oth;
+ }
+
+ constexpr u64 operator&(const Register& oth) const {
+ return value & oth.value;
+ }
+
+ constexpr u64 operator~() const {
+ return ~value;
+ }
+
+private:
+ u64 value{};
+};
+
+union Attribute {
+ Attribute() = default;
+
+ constexpr explicit Attribute(u64 value) : value(value) {}
+
+ enum class Index : u64 {
+ Position = 7,
+ Attribute_0 = 8,
+ };
+
+ union {
+ BitField<22, 2, u64> element;
+ BitField<24, 6, Index> index;
+ BitField<47, 3, u64> size;
+ } fmt20;
+
+ union {
+ BitField<30, 2, u64> element;
+ BitField<32, 6, Index> index;
+ } fmt28;
+
+ BitField<39, 8, u64> reg;
+ u64 value{};
+};
+
+union Sampler {
+ Sampler() = default;
+
+ constexpr explicit Sampler(u64 value) : value(value) {}
+
+ enum class Index : u64 {
+ Sampler_0 = 8,
+ };
+
+ BitField<36, 13, Index> index;
+ u64 value{};
+};
+
+union Uniform {
+ BitField<20, 14, u64> offset;
+ BitField<34, 5, u64> index;
+};
+
+} // namespace Shader
+} // namespace Tegra
+
+namespace std {
+
+// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
+template <>
+struct make_unsigned<Tegra::Shader::Attribute> {
+ using type = Tegra::Shader::Attribute;
+};
+
+template <>
+struct make_unsigned<Tegra::Shader::Register> {
+ using type = Tegra::Shader::Register;
+};
+
+} // namespace std
+
+namespace Tegra {
+namespace Shader {
+
+enum class Pred : u64 {
+ UnusedIndex = 0x7,
+ NeverExecute = 0xF,
+};
+
+enum class PredCondition : u64 {
+ LessThan = 1,
+ Equal = 2,
+ LessEqual = 3,
+ GreaterThan = 4,
+ NotEqual = 5,
+ GreaterEqual = 6,
+ // TODO(Subv): Other condition types
+};
+
+enum class PredOperation : u64 {
+ And = 0,
+ Or = 1,
+ Xor = 2,
+};
+
+enum class SubOp : u64 {
+ Cos = 0x0,
+ Sin = 0x1,
+ Ex2 = 0x2,
+ Lg2 = 0x3,
+ Rcp = 0x4,
+ Rsq = 0x5,
+ Min = 0x8,
+};
+
+union Instruction {
+ Instruction& operator=(const Instruction& instr) {
+ value = instr.value;
+ return *this;
+ }
+
+ constexpr Instruction(u64 value) : value{value} {}
+
+ BitField<0, 8, Register> gpr0;
+ BitField<8, 8, Register> gpr8;
+ union {
+ BitField<16, 4, Pred> full_pred;
+ BitField<16, 3, u64> pred_index;
+ } pred;
+ BitField<19, 1, u64> negate_pred;
+ BitField<20, 8, Register> gpr20;
+ BitField<20, 7, SubOp> sub_op;
+ BitField<28, 8, Register> gpr28;
+ BitField<39, 8, Register> gpr39;
+ BitField<48, 16, u64> opcode;
+
+ union {
+ BitField<20, 19, u64> imm20_19;
+ BitField<20, 32, u64> imm20_32;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ BitField<50, 1, u64> abs_d;
+ BitField<56, 1, u64> negate_imm;
+
+ float GetImm20_19() const {
+ float result{};
+ u32 imm{static_cast<u32>(imm20_19)};
+ imm <<= 12;
+ imm |= negate_imm ? 0x80000000 : 0;
+ std::memcpy(&result, &imm, sizeof(imm));
+ return result;
+ }
+
+ float GetImm20_32() const {
+ float result{};
+ u32 imm{static_cast<u32>(imm20_32)};
+ std::memcpy(&result, &imm, sizeof(imm));
+ return result;
+ }
+ } alu;
+
+ union {
+ BitField<48, 1, u64> negate_b;
+ BitField<49, 1, u64> negate_c;
+ } ffma;
+
+ union {
+ BitField<0, 3, u64> pred0;
+ BitField<3, 3, u64> pred3;
+ BitField<7, 1, u64> abs_a;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, PredOperation> op;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, PredCondition> cond;
+ BitField<56, 1, u64> neg_b;
+ } fsetp;
+
+ BitField<61, 1, u64> is_b_imm;
+ BitField<60, 1, u64> is_b_gpr;
+ BitField<59, 1, u64> is_c_gpr;
+
+ Attribute attribute;
+ Uniform uniform;
+ Sampler sampler;
+
+ u64 value;
+};
+static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
+static_assert(std::is_standard_layout<Instruction>::value,
+ "Structure does not have standard layout");
+
+class OpCode {
+public:
+ enum class Id {
+ KIL,
+ LD_A,
+ ST_A,
+ TEXQ, // Texture Query
+ TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
+ TLDS, // Texture Load with scalar/non-vec4 source/destinations
+ EXIT,
+ IPA,
+ FFMA_IMM, // Fused Multiply and Add
+ FFMA_CR,
+ FFMA_RC,
+ FFMA_RR,
+ FADD_C,
+ FADD_R,
+ FADD_IMM,
+ FMUL_C,
+ FMUL_R,
+ FMUL_IMM,
+ FMUL32_IMM,
+ MUFU, // Multi-Function Operator
+ RRO, // Range Reduction Operator
+ F2F_C,
+ F2F_R,
+ F2F_IMM,
+ F2I_C,
+ F2I_R,
+ F2I_IMM,
+ I2F_C,
+ I2F_R,
+ I2F_IMM,
+ LOP32I,
+ MOV_C,
+ MOV_R,
+ MOV_IMM,
+ MOV32I,
+ SHR_C,
+ SHR_R,
+ SHR_IMM,
+ FSETP_C, // Set Predicate
+ FSETP_R,
+ FSETP_IMM,
+ ISETP_C,
+ ISETP_IMM,
+ ISETP_R,
+ };
+
+ enum class Type {
+ Trivial,
+ Arithmetic,
+ Ffma,
+ Flow,
+ Memory,
+ FloatPredicate,
+ IntegerPredicate,
+ Unknown,
+ };
+
+ class Matcher {
+ public:
+ Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
+ : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
+
+ const char* GetName() const {
+ return name;
+ }
+
+ u16 GetMask() const {
+ return mask;
+ }
+
+ Id GetId() const {
+ return id;
+ }
+
+ Type GetType() const {
+ return type;
+ }
+
+ /**
+ * Tests to see if the given instruction is the instruction this matcher represents.
+ * @param instruction The instruction to test
+ * @returns true if the given instruction matches.
+ */
+ bool Matches(u16 instruction) const {
+ return (instruction & mask) == expected;
+ }
+
+ private:
+ const char* name;
+ u16 mask;
+ u16 expected;
+ Id id;
+ Type type;
+ };
+
+ static boost::optional<const Matcher&> Decode(Instruction instr) {
+ static const auto table{GetDecodeTable()};
+
+ const auto matches_instruction = [instr](const auto& matcher) {
+ return matcher.Matches(static_cast<u16>(instr.opcode));
+ };
+
+ auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
+ return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none;
+ }
+
+private:
+ struct Detail {
+ private:
+ static constexpr size_t opcode_bitsize = 16;
+
+ /**
+ * Generates the mask and the expected value after masking from a given bitstring.
+ * A '0' in a bitstring indicates that a zero must be present at that bit position.
+ * A '1' in a bitstring indicates that a one must be present at that bit position.
+ */
+ static auto GetMaskAndExpect(const char* const bitstring) {
+ u16 mask = 0, expect = 0;
+ for (size_t i = 0; i < opcode_bitsize; i++) {
+ const size_t bit_position = opcode_bitsize - i - 1;
+ switch (bitstring[i]) {
+ case '0':
+ mask |= 1 << bit_position;
+ break;
+ case '1':
+ expect |= 1 << bit_position;
+ mask |= 1 << bit_position;
+ break;
+ default:
+ // Ignore
+ break;
+ }
+ }
+ return std::make_tuple(mask, expect);
+ }
+
+ public:
+ /// Creates a matcher that can match and parse instructions based on bitstring.
+ static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type,
+ const char* const name) {
+ const auto mask_expect = GetMaskAndExpect(bitstring);
+ return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type);
+ }
+ };
+
+ static std::vector<Matcher> GetDecodeTable() {
+ std::vector<Matcher> table = {
+#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
+ INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
+ INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+ INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+ INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
+ INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
+ INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
+ INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
+ INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
+ INST("001100101-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
+ INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
+ INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
+ INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
+ INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
+ INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
+ INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
+ INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
+ INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
+ INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
+ INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
+ INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
+ INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"),
+ INST("0100110010101---", Id::F2F_C, Type::Arithmetic, "F2F_C"),
+ INST("0101110010101---", Id::F2F_R, Type::Arithmetic, "F2F_R"),
+ INST("0011100-10101---", Id::F2F_IMM, Type::Arithmetic, "F2F_IMM"),
+ INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
+ INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
+ INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
+ INST("0100110010111---", Id::I2F_C, Type::Arithmetic, "I2F_C"),
+ INST("0101110010111---", Id::I2F_R, Type::Arithmetic, "I2F_R"),
+ INST("0011100-10111---", Id::I2F_IMM, Type::Arithmetic, "I2F_IMM"),
+ INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"),
+ INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
+ INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
+ INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
+ INST("000000010000----", Id::MOV32I, Type::Arithmetic, "MOV32I"),
+ INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
+ INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
+ INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
+ INST("010010111011----", Id::FSETP_C, Type::FloatPredicate, "FSETP_C"),
+ INST("010110111011----", Id::FSETP_R, Type::FloatPredicate, "FSETP_R"),
+ INST("0011011-1011----", Id::FSETP_IMM, Type::FloatPredicate, "FSETP_IMM"),
+ INST("010010110110----", Id::ISETP_C, Type::IntegerPredicate, "ISETP_C"),
+ INST("010110110110----", Id::ISETP_R, Type::IntegerPredicate, "ISETP_R"),
+ INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerPredicate, "ISETP_IMM"),
+ };
+#undef INST
+ std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
+ // If a matcher has more bits in its mask it is more specific, so it
+ // should come first.
+ return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
+ });
+
+ return table;
+ }
+};
+
+} // namespace Shader
+} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 71a8661b4..2888daedc 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,7 +15,10 @@ namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
+ RGBA16_FLOAT = 0xCA,
+ RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
+ RGBA8_SRGB = 0xD6,
};
class DebugContext;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 35d262189..36629dd11 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -19,7 +19,7 @@ public:
virtual void DrawArrays() = 0;
/// Notify rasterizer that the specified Maxwell register has been changed
- virtual void NotifyMaxwellRegisterChanged(u32 id) = 0;
+ virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f217a265b..2d4a0d6db 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <memory>
#include <string>
#include <tuple>
@@ -13,7 +14,6 @@
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
-#include "common/vector_math.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
@@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-enum class UniformBindings : GLuint { Common, VS, FS };
-
-static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
- size_t expected_size) {
- GLuint ub_index = glGetUniformBlockIndex(shader, name);
- if (ub_index != GL_INVALID_INDEX) {
- GLint ub_size = 0;
- glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
- ASSERT_MSG(ub_size == expected_size,
- "Uniform block size did not match! Got %d, expected %zu",
- static_cast<int>(ub_size), expected_size);
- glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
- }
-}
-
-static void SetShaderUniformBlockBindings(GLuint shader) {
- SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
- sizeof(RasterizerOpenGL::UniformData));
- SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS,
- sizeof(RasterizerOpenGL::VSUniformData));
- SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS,
- sizeof(RasterizerOpenGL::FSUniformData));
-}
-
RasterizerOpenGL::RasterizerOpenGL() {
- shader_dirty = true;
-
has_ARB_buffer_storage = false;
has_ARB_direct_state_access = false;
has_ARB_separate_shader_objects = false;
@@ -72,6 +46,14 @@ RasterizerOpenGL::RasterizerOpenGL() {
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
+ // Create SSBOs
+ for (size_t stage = 0; stage < ssbos.size(); ++stage) {
+ for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) {
+ ssbos[stage][buffer].Create();
+ state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle;
+ }
+ }
+
GLint ext_num;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
for (GLint i = 0; i < ext_num; i++) {
@@ -88,6 +70,8 @@ RasterizerOpenGL::RasterizerOpenGL() {
}
}
+ ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
+
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
state.clip_distance[0] = true;
@@ -102,36 +86,30 @@ RasterizerOpenGL::RasterizerOpenGL() {
state.draw.uniform_buffer = uniform_buffer.handle;
state.Apply();
- glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW);
- glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);
-
- uniform_block_data.dirty = true;
-
// Create render framebuffer
framebuffer.Create();
- if (has_ARB_separate_shader_objects) {
- hw_vao.Create();
- hw_vao_enabled_attributes.fill(false);
+ hw_vao.Create();
+ hw_vao_enabled_attributes.fill(false);
- stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
- stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
- state.draw.vertex_buffer = stream_buffer->GetHandle();
+ stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
+ stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
+ state.draw.vertex_buffer = stream_buffer->GetHandle();
- pipeline.Create();
- state.draw.program_pipeline = pipeline.handle;
- state.draw.shader_program = 0;
- state.draw.vertex_array = hw_vao.handle;
- state.Apply();
+ shader_program_manager = std::make_unique<GLShader::ProgramManager>();
+ state.draw.shader_program = 0;
+ state.draw.vertex_array = hw_vao.handle;
+ state.Apply();
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
- vs_uniform_buffer.Create();
- glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle);
- glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY);
- glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle);
- } else {
- UNREACHABLE();
+ for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
+ auto& buffer = uniform_buffers[index];
+ buffer.Create();
+ glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
+ glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
+ GL_STREAM_COPY);
+ glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
}
accelerate_draw = AccelDraw::Disabled;
@@ -149,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
-void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
- const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-
- if (is_indexed) {
- UNREACHABLE();
- }
-
- // TODO(bunnei): Add support for 1+ vertex arrays
- vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride;
-}
-
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
@@ -171,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
// TODO(bunnei): Add support for 1+ vertex arrays
const auto& vertex_array{regs.vertex_array[0]};
+ const auto& vertex_array_limit{regs.vertex_array_limit[0]};
ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
@@ -183,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
// to avoid OpenGL errors.
for (unsigned index = 0; index < 16; ++index) {
auto& attrib = regs.vertex_attrib_format[index];
+ NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+ index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
+ attrib.offset.Value(), attrib.IsNormalized());
+
glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
@@ -191,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
}
// Copy vertex array data
- const u32 data_size{vertex_array.stride * regs.vertex_buffer.count};
+ const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1};
const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
res_cache.FlushRegion(data_addr, data_size, nullptr);
Memory::ReadBlock(data_addr, array_ptr, data_size);
@@ -200,26 +172,89 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
buffer_offset += data_size;
}
-void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) {
- MICROPROFILE_SCOPE(OpenGL_VS);
- LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle);
-}
+void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) {
+ // Helper function for uploading uniform data
+ const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
+ if (has_ARB_direct_state_access) {
+ glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
+ } else {
+ glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
+ glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
+ }
+ };
-void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) {
- MICROPROFILE_SCOPE(OpenGL_FS);
- UNREACHABLE();
-}
+ auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
+ ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
-bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
- if (!has_ARB_separate_shader_objects) {
- UNREACHABLE();
- return false;
+ // Next available bindpoint to use when uploading the const buffers to the GLSL shaders.
+ u32 current_constbuffer_bindpoint = 0;
+
+ for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
+ ptr_pos += sizeof(GLShader::MaxwellUniformData);
+
+ auto& shader_config = gpu.regs.shader_config[index];
+ const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
+
+ const auto& stage = index - 1; // Stage indices are 0 - 5
+
+ const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage));
+
+ // Skip stages that are not enabled
+ if (!is_enabled) {
+ continue;
+ }
+
+ // Upload uniform data as one UBO per stage
+ const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+ copy_buffer(uniform_buffers[stage].handle, ubo_offset,
+ sizeof(GLShader::MaxwellUniformData));
+ GLShader::MaxwellUniformData* ub_ptr =
+ reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]);
+ ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
+
+ // Fetch program code from memory
+ GLShader::ProgramCode program_code;
+ const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
+ const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)};
+ Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64));
+ GLShader::ShaderSetup setup{std::move(program_code)};
+
+ GLShader::ShaderEntries shader_resources;
+
+ switch (program) {
+ case Maxwell::ShaderProgram::VertexB: {
+ GLShader::MaxwellVSConfig vs_config{setup};
+ shader_resources =
+ shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
+ break;
+ }
+ case Maxwell::ShaderProgram::Fragment: {
+ GLShader::MaxwellFSConfig fs_config{setup};
+ shader_resources =
+ shader_program_manager->UseProgrammableFragmentShader(fs_config, setup);
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index,
+ shader_config.enable.Value(), shader_config.offset);
+ UNREACHABLE();
+ }
+
+ GLuint gl_stage_program = shader_program_manager->GetCurrentProgramStage(
+ static_cast<Maxwell::ShaderStage>(stage));
+
+ // Configure the const buffers for this shader stage.
+ current_constbuffer_bindpoint =
+ SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
+ current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
}
+ shader_program_manager->UseTrivialGeometryShader();
+}
+
+bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
DrawArrays();
-
return true;
}
@@ -255,18 +290,18 @@ void RasterizerOpenGL::DrawArrays() {
: (depth_surface == nullptr ? 1u : depth_surface->res_scale);
MathUtil::Rectangle<u32> draw_rect{
- static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) +
- viewport_rect.left * res_scale,
- surfaces_rect.left, surfaces_rect.right)), // Left
- static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
- viewport_rect.top * res_scale,
- surfaces_rect.bottom, surfaces_rect.top)), // Top
- static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) +
- viewport_rect.right * res_scale,
- surfaces_rect.left, surfaces_rect.right)), // Right
- static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
- viewport_rect.bottom * res_scale,
- surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+ static_cast<u32>(
+ std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale,
+ surfaces_rect.left, surfaces_rect.right)), // Left
+ static_cast<u32>(
+ std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
+ surfaces_rect.bottom, surfaces_rect.top)), // Top
+ static_cast<u32>(
+ std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
+ surfaces_rect.left, surfaces_rect.right)), // Right
+ static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
+ viewport_rect.bottom * res_scale,
+ surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
// Bind the framebuffer surfaces
BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
@@ -280,18 +315,6 @@ void RasterizerOpenGL::DrawArrays() {
// Sync and bind the texture surfaces
BindTextures();
- // Sync and bind the shader
- if (shader_dirty) {
- SetShader();
- shader_dirty = false;
- }
-
- // Sync the uniform data
- if (uniform_block_data.dirty) {
- glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data);
- uniform_block_data.dirty = false;
- }
-
// Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
// scissor test to prevent drawing outside of the framebuffer region
state.scissor.enabled = true;
@@ -303,15 +326,22 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
- AnalyzeVertexArray(is_indexed);
+ const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
+ const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
+
+ // TODO(bunnei): Add support for 1+ vertex arrays
+ vs_input_size = vertex_num * regs.vertex_array[0].stride;
+
state.draw.vertex_buffer = stream_buffer->GetHandle();
state.Apply();
size_t buffer_size = static_cast<size_t>(vs_input_size);
if (is_indexed) {
- UNREACHABLE();
+ buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size;
}
- buffer_size += sizeof(VSUniformData);
+
+ // Uniform space for the 5 shader stages
+ buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
size_t ptr_pos = 0;
u8* buffer_ptr;
@@ -322,36 +352,37 @@ void RasterizerOpenGL::DrawArrays() {
SetupVertexArray(buffer_ptr, buffer_offset);
ptr_pos += vs_input_size;
+ // If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
- UNREACHABLE();
- }
+ ptr_pos = Common::AlignUp(ptr_pos, 4);
- SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]),
- buffer_offset + static_cast<GLintptr>(ptr_pos));
- const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
- ptr_pos += sizeof(VSUniformData);
+ const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
+ const VAddr index_data_addr{
+ memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())};
+ Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size);
- stream_buffer->Unmap();
+ index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+ ptr_pos += index_buffer_size;
+ }
- const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
- if (has_ARB_direct_state_access) {
- glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
- } else {
- glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
- glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
- }
- };
+ SetupShaders(buffer_ptr, buffer_offset, ptr_pos);
- copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData));
+ stream_buffer->Unmap();
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle);
+ shader_program_manager->ApplyTo(state);
+ state.Apply();
+ const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
if (is_indexed) {
- UNREACHABLE();
+ const GLint index_min{static_cast<GLint>(regs.index_array.first)};
+ const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
+ glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
+ MaxwellToGL::IndexFormat(regs.index_array.format),
+ reinterpret_cast<const void*>(index_buffer_offset),
+ -index_min);
} else {
- glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0,
- regs.vertex_buffer.count);
+ glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
}
// Disable scissor test
@@ -384,7 +415,7 @@ void RasterizerOpenGL::DrawArrays() {
void RasterizerOpenGL::BindTextures() {
using Regs = Tegra::Engines::Maxwell3D::Regs;
- auto maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
+ auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
// Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
// certain number in OpenGL. We try to only use the minimum amount of host textures by not
@@ -415,7 +446,32 @@ void RasterizerOpenGL::BindTextures() {
}
}
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {}
+void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+ switch (method) {
+ case MAXWELL3D_REG_INDEX(blend.separate_alpha):
+ ASSERT_MSG(false, "unimplemented");
+ break;
+ case MAXWELL3D_REG_INDEX(blend.equation_rgb):
+ state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
+ break;
+ case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
+ state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
+ break;
+ case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
+ state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
+ break;
+ case MAXWELL3D_REG_INDEX(blend.equation_a):
+ state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
+ break;
+ case MAXWELL3D_REG_INDEX(blend.factor_source_a):
+ state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
+ break;
+ case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
+ state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
+ break;
+ }
+}
void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -467,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
src_params.width = std::min(framebuffer.width, pixel_stride);
src_params.height = framebuffer.height;
src_params.stride = pixel_stride;
- src_params.is_tiled = false;
+ src_params.is_tiled = true;
+ src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
src_params.pixel_format =
SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
+ src_params.component_type =
+ SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
src_params.UpdateParams();
MathUtil::Rectangle<u32> src_rect;
@@ -531,70 +590,53 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
-void RasterizerOpenGL::SetShader() {
- // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to
- // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell
- // shaders.
-
- static constexpr char vertex_shader[] = R"(
-#version 150 core
-
-in vec2 vert_position;
-in vec2 vert_tex_coord;
-out vec2 frag_tex_coord;
-
-void main() {
- // Multiply input position by the rotscale part of the matrix and then manually translate by
- // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
- // to `vec3(vert_position.xy, 1.0)`
- gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0);
- frag_tex_coord = vert_tex_coord;
-}
-)";
-
- static constexpr char fragment_shader[] = R"(
-#version 150 core
-
-in vec2 frag_tex_coord;
-out vec4 color;
-
-uniform sampler2D tex[32];
+u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program,
+ u32 current_bindpoint,
+ const std::vector<GLShader::ConstBufferEntry>& entries) {
+ auto& gpu = Core::System::GetInstance().GPU();
+ auto& maxwell3d = gpu.Get3DEngine();
-void main() {
- color = texture(tex[0], frag_tex_coord);
-}
-)";
+ ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
+ "Attempted to upload constbuffer of disabled shader stage");
- if (current_shader) {
- return;
+ // Reset all buffer draw state for this stage.
+ for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
+ buffer.bindpoint = 0;
+ buffer.enabled = false;
}
- LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
-
- current_shader = &test_shader;
- if (has_ARB_separate_shader_objects) {
- test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true);
- glActiveShaderProgram(pipeline.handle, test_shader.shader.handle);
- } else {
- UNREACHABLE();
+ // Upload only the enabled buffers from the 16 constbuffers of each shader stage
+ auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
+
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& used_buffer = entries[bindpoint];
+ const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
+ auto& buffer_draw_state =
+ state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
+
+ ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer");
+ buffer_draw_state.enabled = true;
+ buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
+
+ VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address);
+ std::vector<u8> data(used_buffer.GetSize() * sizeof(float));
+ Memory::ReadBlock(addr, data.data(), data.size());
+
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+
+ // Now configure the bindpoint of the buffer inside the shader
+ std::string buffer_name = used_buffer.GetName();
+ GLuint index =
+ glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str());
+ if (index != -1)
+ glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint);
}
- state.draw.shader_program = test_shader.shader.handle;
state.Apply();
- for (u32 texture = 0; texture < texture_samplers.size(); ++texture) {
- // Set the texture samplers to correspond to different texture units
- std::string uniform_name = "tex[" + std::to_string(texture) + "]";
- GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str());
- if (uniform_tex != -1) {
- glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
- }
- }
-
- if (has_ARB_separate_shader_objects) {
- state.draw.shader_program = 0;
- state.Apply();
- }
+ return current_bindpoint + entries.size();
}
void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d868bf421..03e02b52a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -6,19 +6,16 @@
#include <array>
#include <cstddef>
-#include <cstring>
#include <memory>
-#include <unordered_map>
#include <vector>
#include <glad/glad.h>
-#include "common/bit_field.h"
#include "common/common_types.h"
-#include "common/hash.h"
-#include "common/vector_math.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -30,7 +27,7 @@ public:
~RasterizerOpenGL() override;
void DrawArrays() override;
- void NotifyMaxwellRegisterChanged(u32 id) override;
+ void NotifyMaxwellRegisterChanged(u32 method) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
@@ -45,7 +42,7 @@ public:
/// OpenGL shader generated for a given Maxwell register state
struct MaxwellShader {
/// OpenGL shader resource
- OGLShader shader;
+ OGLProgram shader;
};
struct VertexShader {
@@ -56,34 +53,6 @@ public:
OGLShader shader;
};
- /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
- // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
- // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
- // Not following that rule will cause problems on some AMD drivers.
- struct UniformData {};
-
- // static_assert(
- // sizeof(UniformData) == 0x460,
- // "The size of the UniformData structure has changed, update the structure in the shader");
- static_assert(sizeof(UniformData) < 16384,
- "UniformData structure must be less than 16kb as per the OpenGL spec");
-
- struct VSUniformData {};
- // static_assert(
- // sizeof(VSUniformData) == 1856,
- // "The size of the VSUniformData structure has changed, update the structure in the
- // shader");
- static_assert(sizeof(VSUniformData) < 16384,
- "VSUniformData structure must be less than 16kb as per the OpenGL spec");
-
- struct FSUniformData {};
- // static_assert(
- // sizeof(FSUniformData) == 1856,
- // "The size of the FSUniformData structure has changed, update the structure in the
- // shader");
- static_assert(sizeof(FSUniformData) < 16384,
- "FSUniformData structure must be less than 16kb as per the OpenGL spec");
-
private:
class SamplerInfo {
public:
@@ -113,6 +82,18 @@ private:
/// Binds the required textures to OpenGL before drawing a batch.
void BindTextures();
+ /*
+ * Configures the current constbuffers to use for the draw command.
+ * @param stage The shader stage to configure buffers for.
+ * @param program The OpenGL program object that contains the specified stage.
+ * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
+ * @param entries Vector describing the buffers that are actually used in the guest shader.
+ * @returns The next available bindpoint for use in the next shader stage.
+ */
+ u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
+ u32 current_bindpoint,
+ const std::vector<GLShader::ConstBufferEntry>& entries);
+
/// Syncs the viewport to match the guest state
void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
@@ -122,9 +103,6 @@ private:
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
- /// Sets the OpenGL shader in accordance with the current guest state
- void SetShader();
-
/// Syncs the cull mode to match the guest state
void SyncCullMode();
@@ -152,23 +130,16 @@ private:
RasterizerCacheOpenGL res_cache;
- /// Shader used for test renderering - to be removed once we have emulated shaders
- MaxwellShader test_shader{};
-
- const MaxwellShader* current_shader{};
- bool shader_dirty{};
-
- struct {
- UniformData data;
- bool dirty;
- } uniform_block_data = {};
-
- OGLPipeline pipeline;
+ std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
OGLVertexArray sw_vao;
OGLVertexArray hw_vao;
std::array<bool, 16> hw_vao_enabled_attributes;
- std::array<SamplerInfo, 32> texture_samplers;
+ std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
+ std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
+ ssbos;
+
static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
std::unique_ptr<OGLStreamBuffer> vertex_buffer;
OGLBuffer uniform_buffer;
@@ -179,22 +150,11 @@ private:
GLsizeiptr vs_input_size;
- void AnalyzeVertexArray(bool is_indexed);
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
- OGLBuffer vs_uniform_buffer;
- std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map;
- std::unordered_map<std::string, VertexShader> vs_shader_cache;
- OGLShader vs_default_shader;
-
- void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset);
-
- OGLBuffer fs_uniform_buffer;
- std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map;
- std::unordered_map<std::string, FragmentShader> fs_shader_cache;
- OGLShader fs_default_shader;
+ std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
- void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset);
+ void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos);
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 5cbafa2e7..ced2b8247 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -7,7 +7,6 @@
#include <cstring>
#include <iterator>
#include <memory>
-#include <unordered_set>
#include <utility>
#include <vector>
#include <boost/optional.hpp>
@@ -20,7 +19,6 @@
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
-#include "common/vector_math.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
@@ -36,6 +34,7 @@
using SurfaceType = SurfaceParams::SurfaceType;
using PixelFormat = SurfaceParams::PixelFormat;
+using ComponentType = SurfaceParams::ComponentType;
struct FormatTuple {
GLint internal_format;
@@ -47,26 +46,24 @@ struct FormatTuple {
u32 compression_factor;
};
-static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
+static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8
+ {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5
+ {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45
}};
-static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
- {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
-}};
-
-static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
+static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
- if (type == SurfaceType::Color) {
- ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size());
- return fb_format_tuples[static_cast<unsigned int>(pixel_format)];
+ if (type == SurfaceType::ColorTexture) {
+ ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+ // For now only UNORM components are supported
+ ASSERT(component_type == ComponentType::UNorm);
+ return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
// TODO(Subv): Implement depth formats
ASSERT_MSG(false, "Unimplemented");
- } else if (type == SurfaceType::Texture) {
- ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
- return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
}
UNREACHABLE();
@@ -85,56 +82,42 @@ static u16 GetResolutionScaleFactor() {
}
template <bool morton_to_gl, PixelFormat format>
-static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
- constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
- constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
- for (u32 y = 0; y < 8; ++y) {
- for (u32 x = 0; x < 8; ++x) {
- u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
- u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
- if (morton_to_gl) {
- std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
- } else {
- std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
- }
- }
- }
-}
-
-template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start,
+ VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
- // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
- // configuration for this and perform more generic un/swizzle
- LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
- VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
- Memory::GetPointer(base), gl_buffer, morton_to_gl);
-}
-
-template <>
-void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
- VAddr start, VAddr end) {
- constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
- constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
-
- // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
- // configuration for this and perform more generic un/swizzle
- LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
- auto data =
- Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
- std::memcpy(gl_buffer, data.data(), data.size());
+ if (morton_to_gl) {
+ auto data = Tegra::Texture::UnswizzleTexture(
+ base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height,
+ block_height);
+ std::memcpy(gl_buffer, data.data(), data.size());
+ } else {
+ // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
+ // the configuration for this and perform more generic un/swizzle
+ LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+ VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
+ Memory::GetPointer(base), gl_buffer, morton_to_gl);
+ }
}
-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = {
- MortonCopy<true, PixelFormat::RGBA8>,
- MortonCopy<true, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
+ SurfaceParams::MaxPixelFormat>
+ morton_to_gl_fns = {
+ MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
+ MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
+ MortonCopy<true, PixelFormat::DXT45>,
};
-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = {
- MortonCopy<false, PixelFormat::RGBA8>,
- MortonCopy<false, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
+ SurfaceParams::MaxPixelFormat>
+ gl_to_morton_fns = {
+ MortonCopy<false, PixelFormat::ABGR8>,
+ MortonCopy<false, PixelFormat::B5G6R5>,
+ // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+ nullptr,
+ nullptr,
+ nullptr,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -183,7 +166,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
u32 buffers = 0;
- if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+ if (type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
@@ -311,15 +294,18 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su
bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
return std::tie(other_surface.addr, other_surface.width, other_surface.height,
- other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
- std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
+ other_surface.stride, other_surface.block_height, other_surface.pixel_format,
+ other_surface.component_type,
+ other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height,
+ pixel_format, component_type, is_tiled) &&
pixel_format != PixelFormat::Invalid;
}
bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
return sub_surface.addr >= addr && sub_surface.end <= end &&
sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
- sub_surface.is_tiled == is_tiled &&
+ sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
+ sub_surface.component_type == component_type &&
(sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
GetSubRect(sub_surface).left + sub_surface.width <= stride;
@@ -328,7 +314,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&
- is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
+ is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
+ component_type == expanded_surface.component_type && stride == expanded_surface.stride &&
(std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
0;
@@ -339,6 +326,10 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
end < texcopy_params.end) {
return false;
}
+ if (texcopy_params.block_height != block_height ||
+ texcopy_params.component_type != component_type)
+ return false;
+
if (texcopy_params.width != texcopy_params.stride) {
const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
@@ -481,18 +472,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
const u64 start_offset = load_start - addr;
if (!is_tiled) {
- ASSERT(type == SurfaceType::Color);
const u32 bytes_per_pixel{GetFormatBpp() >> 3};
- // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
- // the configuration for this and perform more generic un/swizzle
- LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
- VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
- texture_src_data + start_offset, &gl_buffer[start_offset],
- true);
+ std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
+ bytes_per_pixel * width * height);
} else {
- morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
- load_start, load_end);
+ morton_to_gl_fns[static_cast<size_t>(pixel_format)](
+ stride, block_height, height, &gl_buffer[0], addr, load_start, load_end);
}
}
@@ -533,11 +519,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
if (backup_bytes)
std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
} else if (!is_tiled) {
- ASSERT(type == SurfaceType::Color);
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
} else {
- gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
- flush_start, flush_end);
+ gl_to_morton_fns[static_cast<size_t>(pixel_format)](
+ stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end);
}
}
@@ -556,7 +541,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
GLint y0 = static_cast<GLint>(rect.bottom);
size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format);
- const FormatTuple& tuple = GetFormatTuple(pixel_format);
+ const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
GLuint target_tex = texture.handle;
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
@@ -629,7 +614,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
- const FormatTuple& tuple = GetFormatTuple(pixel_format);
+ const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
// Ensure no bad interactions with GL_PACK_ALIGNMENT
ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
@@ -662,7 +647,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
state.draw.read_framebuffer = read_fb_handle;
state.Apply();
- if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+ if (type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
texture.handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
@@ -685,7 +670,8 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
}
-enum MatchFlags {
+enum class MatchFlags {
+ None = 0,
Invalid = 1, // Flag that can be applied to other match types, invalid matches require
// validation before they can be used
Exact = 1 << 1, // Surfaces perfectly match
@@ -699,6 +685,10 @@ constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) {
return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
}
+constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) {
+ return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs));
+}
+
/// Get the best surface match (and its match type) for the given flags
template <MatchFlags find_flags>
Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params,
@@ -716,15 +706,15 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params
: (params.res_scale <= surface->res_scale);
// validity will be checked in GetCopyableInterval
bool is_valid =
- find_flags & MatchFlags::Copy
+ (find_flags & MatchFlags::Copy) != MatchFlags::None
? true
: surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
- if (!(find_flags & MatchFlags::Invalid) && !is_valid)
+ if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid)
continue;
auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
- if (!(find_flags & check_type))
+ if ((find_flags & check_type) == MatchFlags::None)
return;
bool matched;
@@ -818,7 +808,7 @@ void main() {
color = texelFetch(tbo, tbo_offset).rabg;
}
)";
- d24s8_abgr_shader.Create(vs_source, nullptr, fs_source);
+ d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source);
OpenGLState state = OpenGLState::GetCurState();
GLuint old_program = state.draw.shader_program;
@@ -1041,9 +1031,25 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
params.height = config.tic.Height();
params.is_tiled = config.tic.IsTiled();
params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
+
+ // TODO(Subv): Different types per component are not supported.
+ ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
+ config.tic.r_type.Value() == config.tic.b_type.Value() &&
+ config.tic.r_type.Value() == config.tic.a_type.Value());
+
+ params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value());
+
+ if (config.tic.IsTiled()) {
+ params.block_height = config.tic.BlockHeight();
+ } else {
+ // Use the texture-provided stride value if the texture isn't tiled.
+ params.stride = params.PixelsInBytes(config.tic.Pitch());
+ }
+
params.UpdateParams();
- if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) {
+ if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
+ params.stride != params.width) {
Surface src_surface;
MathUtil::Rectangle<u32> rect;
std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
@@ -1083,10 +1089,10 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
}
MathUtil::Rectangle<u32> viewport_clamped{
- static_cast<u32>(MathUtil::Clamp(viewport.left, 0, static_cast<s32>(config.width))),
- static_cast<u32>(MathUtil::Clamp(viewport.top, 0, static_cast<s32>(config.height))),
- static_cast<u32>(MathUtil::Clamp(viewport.right, 0, static_cast<s32>(config.width))),
- static_cast<u32>(MathUtil::Clamp(viewport.bottom, 0, static_cast<s32>(config.height)))};
+ static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))),
+ static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))),
+ static_cast<u32>(std::clamp(viewport.right, 0, static_cast<s32>(config.width))),
+ static_cast<u32>(std::clamp(viewport.bottom, 0, static_cast<s32>(config.height)))};
// get color and depth surfaces
SurfaceParams color_params;
@@ -1094,10 +1100,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
color_params.res_scale = resolution_scale_factor;
color_params.width = config.width;
color_params.height = config.height;
+ // TODO(Subv): Can framebuffers use a different block height?
+ color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
SurfaceParams depth_params = color_params;
color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address());
color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
+ color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format);
color_params.UpdateParams();
ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
@@ -1293,7 +1302,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface
const SurfaceInterval invalid_interval(addr, addr + size);
if (region_owner != nullptr) {
- ASSERT(region_owner->type != SurfaceType::Texture);
ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
// Surfaces can't have a gap
ASSERT(region_owner->width == region_owner->stride);
@@ -1355,7 +1363,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
surface->gl_buffer_size = 0;
surface->invalid_regions.insert(surface->GetInterval());
- AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
+ AllocateSurfaceTexture(surface->texture.handle,
+ GetFormatTuple(surface->pixel_format, surface->component_type),
surface->GetScaledWidth(), surface->GetScaledHeight());
return surface;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 06524fc59..6861efe16 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -52,27 +52,45 @@ enum class ScaleMatch {
struct SurfaceParams {
enum class PixelFormat {
- RGBA8 = 0,
- DXT1 = 1,
+ ABGR8 = 0,
+ B5G6R5 = 1,
+ DXT1 = 2,
+ DXT23 = 3,
+ DXT45 = 4,
+
+ Max,
Invalid = 255,
};
+ static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
+
+ enum class ComponentType {
+ Invalid = 0,
+ SNorm = 1,
+ UNorm = 2,
+ SInt = 3,
+ UInt = 4,
+ Float = 5,
+ };
+
enum class SurfaceType {
- Color = 0,
- Texture = 1,
- Depth = 2,
- DepthStencil = 3,
- Fill = 4,
- Invalid = 5
+ ColorTexture = 0,
+ Depth = 1,
+ DepthStencil = 2,
+ Fill = 3,
+ Invalid = 4,
};
static constexpr unsigned int GetFormatBpp(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
- constexpr std::array<unsigned int, 2> bpp_table = {
- 32, // RGBA8
- 64, // DXT1
+ constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = {
+ 32, // ABGR8
+ 16, // B5G6R5
+ 64, // DXT1
+ 128, // DXT23
+ 128, // DXT45
};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -85,8 +103,9 @@ struct SurfaceParams {
static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
- return PixelFormat::RGBA8;
+ return PixelFormat::ABGR8;
default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -94,8 +113,9 @@ struct SurfaceParams {
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return PixelFormat::RGBA8;
+ return PixelFormat::ABGR8;
default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -104,10 +124,69 @@ struct SurfaceParams {
// TODO(Subv): Properly implement this
switch (format) {
case Tegra::Texture::TextureFormat::A8R8G8B8:
- return PixelFormat::RGBA8;
+ return PixelFormat::ABGR8;
+ case Tegra::Texture::TextureFormat::B5G6R5:
+ return PixelFormat::B5G6R5;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
+ case Tegra::Texture::TextureFormat::DXT23:
+ return PixelFormat::DXT23;
+ case Tegra::Texture::TextureFormat::DXT45:
+ return PixelFormat::DXT45;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+ }
+
+ static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
+ // TODO(Subv): Properly implement this
+ switch (format) {
+ case PixelFormat::ABGR8:
+ return Tegra::Texture::TextureFormat::A8R8G8B8;
+ case PixelFormat::B5G6R5:
+ return Tegra::Texture::TextureFormat::B5G6R5;
+ case PixelFormat::DXT1:
+ return Tegra::Texture::TextureFormat::DXT1;
+ case PixelFormat::DXT23:
+ return Tegra::Texture::TextureFormat::DXT23;
+ case PixelFormat::DXT45:
+ return Tegra::Texture::TextureFormat::DXT45;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
+ // TODO(Subv): Implement more component types
+ switch (type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return ComponentType::UNorm;
default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
+ UNREACHABLE();
+ }
+ }
+
+ static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
+ // TODO(Subv): Implement more render targets
+ switch (format) {
+ case Tegra::RenderTargetFormat::RGBA8_UNORM:
+ case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
+ return ComponentType::UNorm;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+ }
+
+ static ComponentType ComponentTypeFromGPUPixelFormat(
+ Tegra::FramebufferConfig::PixelFormat format) {
+ switch (format) {
+ case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ return ComponentType::UNorm;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -116,8 +195,7 @@ struct SurfaceParams {
SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b);
- if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
- (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
+ if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
return true;
}
@@ -133,12 +211,8 @@ struct SurfaceParams {
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
- if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::RGBA8)) {
- return SurfaceType::Color;
- }
-
- if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) {
- return SurfaceType::Texture;
+ if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
+ return SurfaceType::ColorTexture;
}
// TODO(Subv): Implement the other formats
@@ -210,11 +284,13 @@ struct SurfaceParams {
u32 width = 0;
u32 height = 0;
u32 stride = 0;
+ u32 block_height = 0;
u16 res_scale = 1;
bool is_tiled = false;
PixelFormat pixel_format = PixelFormat::Invalid;
SurfaceType type = SurfaceType::Invalid;
+ ComponentType component_type = ComponentType::Invalid;
};
struct CachedSurface : SurfaceParams {
@@ -334,7 +410,7 @@ private:
OGLVertexArray attributeless_vao;
OGLBuffer d24s8_abgr_buffer;
GLsizeiptr d24s8_abgr_buffer_size;
- OGLShader d24s8_abgr_shader;
+ OGLProgram d24s8_abgr_shader;
GLint d24s8_abgr_tbo_size_u_id;
GLint d24s8_abgr_viewport_u_id;
};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 7da5e74d1..93f9172e7 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -13,14 +13,16 @@
class OGLTexture : private NonCopyable {
public:
OGLTexture() = default;
- OGLTexture(OGLTexture&& o) {
- std::swap(handle, o.handle);
- }
+
+ OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
~OGLTexture() {
Release();
}
- OGLTexture& operator=(OGLTexture&& o) {
- std::swap(handle, o.handle);
+
+ OGLTexture& operator=(OGLTexture&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
return *this;
}
@@ -46,14 +48,16 @@ public:
class OGLSampler : private NonCopyable {
public:
OGLSampler() = default;
- OGLSampler(OGLSampler&& o) {
- std::swap(handle, o.handle);
- }
+
+ OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
~OGLSampler() {
Release();
}
- OGLSampler& operator=(OGLSampler&& o) {
- std::swap(handle, o.handle);
+
+ OGLSampler& operator=(OGLSampler&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
return *this;
}
@@ -79,25 +83,71 @@ public:
class OGLShader : private NonCopyable {
public:
OGLShader() = default;
- OGLShader(OGLShader&& o) {
- std::swap(handle, o.handle);
- }
+
+ OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
~OGLShader() {
Release();
}
- OGLShader& operator=(OGLShader&& o) {
- std::swap(handle, o.handle);
+
+ OGLShader& operator=(OGLShader&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
return *this;
}
- /// Creates a new internal OpenGL resource and stores the handle
- void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader,
- const std::vector<const char*>& feedback_vars = {},
- bool separable_program = false) {
+ void Create(const char* source, GLenum type) {
if (handle != 0)
return;
- handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars,
- separable_program);
+ if (source == nullptr)
+ return;
+ handle = GLShader::LoadShader(source, type);
+ }
+
+ void Release() {
+ if (handle == 0)
+ return;
+ glDeleteShader(handle);
+ handle = 0;
+ }
+
+ GLuint handle = 0;
+};
+
+class OGLProgram : private NonCopyable {
+public:
+ OGLProgram() = default;
+
+ OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+ ~OGLProgram() {
+ Release();
+ }
+
+ OGLProgram& operator=(OGLProgram&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
+ return *this;
+ }
+
+ template <typename... T>
+ void Create(bool separable_program, T... shaders) {
+ if (handle != 0)
+ return;
+ handle = GLShader::LoadProgram(separable_program, shaders...);
+ }
+
+ /// Creates a new internal OpenGL resource and stores the handle
+ void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
+ bool separable_program = false) {
+ OGLShader vert, geo, frag;
+ if (vert_shader)
+ vert.Create(vert_shader, GL_VERTEX_SHADER);
+ if (geo_shader)
+ geo.Create(geo_shader, GL_GEOMETRY_SHADER);
+ if (frag_shader)
+ frag.Create(frag_shader, GL_FRAGMENT_SHADER);
+ Create(separable_program, vert.handle, geo.handle, frag.handle);
}
/// Deletes the internal OpenGL resource
@@ -115,13 +165,12 @@ public:
class OGLPipeline : private NonCopyable {
public:
OGLPipeline() = default;
- OGLPipeline(OGLPipeline&& o) {
- handle = std::exchange<GLuint>(o.handle, 0);
- }
+ OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
+
~OGLPipeline() {
Release();
}
- OGLPipeline& operator=(OGLPipeline&& o) {
+ OGLPipeline& operator=(OGLPipeline&& o) noexcept {
handle = std::exchange<GLuint>(o.handle, 0);
return *this;
}
@@ -148,14 +197,16 @@ public:
class OGLBuffer : private NonCopyable {
public:
OGLBuffer() = default;
- OGLBuffer(OGLBuffer&& o) {
- std::swap(handle, o.handle);
- }
+
+ OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
~OGLBuffer() {
Release();
}
- OGLBuffer& operator=(OGLBuffer&& o) {
- std::swap(handle, o.handle);
+
+ OGLBuffer& operator=(OGLBuffer&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
return *this;
}
@@ -182,12 +233,12 @@ class OGLSync : private NonCopyable {
public:
OGLSync() = default;
- OGLSync(OGLSync&& o) : handle(std::exchange(o.handle, nullptr)) {}
+ OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
~OGLSync() {
Release();
}
- OGLSync& operator=(OGLSync&& o) {
+ OGLSync& operator=(OGLSync&& o) noexcept {
Release();
handle = std::exchange(o.handle, nullptr);
return *this;
@@ -214,14 +265,16 @@ public:
class OGLVertexArray : private NonCopyable {
public:
OGLVertexArray() = default;
- OGLVertexArray(OGLVertexArray&& o) {
- std::swap(handle, o.handle);
- }
+
+ OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
~OGLVertexArray() {
Release();
}
- OGLVertexArray& operator=(OGLVertexArray&& o) {
- std::swap(handle, o.handle);
+
+ OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
return *this;
}
@@ -247,14 +300,16 @@ public:
class OGLFramebuffer : private NonCopyable {
public:
OGLFramebuffer() = default;
- OGLFramebuffer(OGLFramebuffer&& o) {
- std::swap(handle, o.handle);
- }
+
+ OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
~OGLFramebuffer() {
Release();
}
- OGLFramebuffer& operator=(OGLFramebuffer&& o) {
- std::swap(handle, o.handle);
+
+ OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
return *this;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 564ea8f9e..086424395 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2,57 +2,778 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <map>
+#include <set>
#include <string>
-#include <queue>
+#include <string_view>
#include "common/assert.h"
#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-namespace Maxwell3D {
-namespace Shader {
+namespace GLShader {
namespace Decompiler {
+using Tegra::Shader::Attribute;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+using Tegra::Shader::Sampler;
+using Tegra::Shader::SubOp;
+using Tegra::Shader::Uniform;
+
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
-class Impl {
+class DecompileFail : public std::runtime_error {
+public:
+ using std::runtime_error::runtime_error;
+};
+
+/// Describes the behaviour of code path of a given entry point and a return point.
+enum class ExitMethod {
+ Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
+ AlwaysReturn, ///< All code paths reach the return point.
+ Conditional, ///< Code path reaches the return point or an END instruction conditionally.
+ AlwaysEnd, ///< All code paths reach a END instruction.
+};
+
+/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
+struct Subroutine {
+ /// Generates a name suitable for GLSL source code.
+ std::string GetName() const {
+ return "sub_" + std::to_string(begin) + "_" + std::to_string(end);
+ }
+
+ u32 begin; ///< Entry point of the subroutine.
+ u32 end; ///< Return point of the subroutine.
+ ExitMethod exit_method; ///< Exit method of the subroutine.
+ std::set<u32> labels; ///< Addresses refereced by JMP instructions.
+
+ bool operator<(const Subroutine& rhs) const {
+ return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
+ }
+};
+
+/// Analyzes shader code and produces a set of subroutines.
+class ControlFlowAnalyzer {
public:
- Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
- const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset,
- const std::function<std::string(u32)>& inputreg_getter,
- const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul,
- const std::string& emit_cb, const std::string& setemit_cb)
- : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset),
- inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter),
- sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {}
+ ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
+ : program_code(program_code) {
+
+ // Recursively finds all subroutines.
+ const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
+ if (program_main.exit_method != ExitMethod::AlwaysEnd)
+ throw DecompileFail("Program does not always end");
+ }
- std::string Decompile() {
- UNREACHABLE();
- return {};
+ std::set<Subroutine> GetSubroutines() {
+ return std::move(subroutines);
}
private:
- const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code;
- const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data;
- u32 main_offset;
- const std::function<std::string(u32)>& inputreg_getter;
- const std::function<std::string(u32)>& outputreg_getter;
- bool sanitize_mul;
- const std::string& emit_cb;
- const std::string& setemit_cb;
+ const ProgramCode& program_code;
+ std::set<Subroutine> subroutines;
+ std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
+
+ /// Adds and analyzes a new subroutine if it is not added yet.
+ const Subroutine& AddSubroutine(u32 begin, u32 end) {
+ auto iter = subroutines.find(Subroutine{begin, end});
+ if (iter != subroutines.end())
+ return *iter;
+
+ Subroutine subroutine{begin, end};
+ subroutine.exit_method = Scan(begin, end, subroutine.labels);
+ if (subroutine.exit_method == ExitMethod::Undetermined)
+ throw DecompileFail("Recursive function detected");
+ return *subroutines.insert(std::move(subroutine)).first;
+ }
+
+ /// Scans a range of code for labels and determines the exit method.
+ ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
+ auto [iter, inserted] =
+ exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
+ ExitMethod& exit_method = iter->second;
+ if (!inserted)
+ return exit_method;
+
+ for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
+ if (const auto opcode = OpCode::Decode({program_code[offset]})) {
+ switch (opcode->GetId()) {
+ case OpCode::Id::EXIT: {
+ return exit_method = ExitMethod::AlwaysEnd;
+ }
+ }
+ }
+ }
+ return exit_method = ExitMethod::AlwaysReturn;
+ }
+};
+
+class ShaderWriter {
+public:
+ void AddLine(std::string_view text) {
+ DEBUG_ASSERT(scope >= 0);
+ if (!text.empty()) {
+ AppendIndentation();
+ }
+ shader_source += text;
+ AddNewLine();
+ }
+
+ void AddLine(char character) {
+ DEBUG_ASSERT(scope >= 0);
+ AppendIndentation();
+ shader_source += character;
+ AddNewLine();
+ }
+
+ void AddNewLine() {
+ DEBUG_ASSERT(scope >= 0);
+ shader_source += '\n';
+ }
+
+ std::string GetResult() {
+ return std::move(shader_source);
+ }
+
+ int scope = 0;
+
+private:
+ void AppendIndentation() {
+ shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+ }
+
+ std::string shader_source;
};
-std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
- const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
- u32 main_offset,
- const std::function<std::string(u32)>& inputreg_getter,
- const std::function<std::string(u32)>& outputreg_getter,
- bool sanitize_mul, const std::string& emit_cb,
- const std::string& setemit_cb) {
- Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter,
- sanitize_mul, emit_cb, setemit_cb);
- return impl.Decompile();
+class GLSLGenerator {
+public:
+ GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
+ u32 main_offset, Maxwell3D::Regs::ShaderStage stage)
+ : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
+ stage(stage) {
+
+ Generate();
+ }
+
+ std::string GetShaderCode() {
+ return declarations.GetResult() + shader.GetResult();
+ }
+
+ /// Returns entries in the shader that are useful for external functions
+ ShaderEntries GetEntries() const {
+ return {GetConstBuffersDeclarations()};
+ }
+
+private:
+ /// Gets the Subroutine object corresponding to the specified address.
+ const Subroutine& GetSubroutine(u32 begin, u32 end) const {
+ auto iter = subroutines.find(Subroutine{begin, end});
+ ASSERT(iter != subroutines.end());
+ return *iter;
+ }
+
+ /// Generates code representing an input attribute register.
+ std::string GetInputAttribute(Attribute::Index attribute) {
+ switch (attribute) {
+ case Attribute::Index::Position:
+ return "position";
+ default:
+ const u32 index{static_cast<u32>(attribute) -
+ static_cast<u32>(Attribute::Index::Attribute_0)};
+ if (attribute >= Attribute::Index::Attribute_0) {
+ declr_input_attribute.insert(attribute);
+ return "input_attribute_" + std::to_string(index);
+ }
+
+ NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
+ UNREACHABLE();
+ }
+ }
+
+ /// Generates code representing an output attribute register.
+ std::string GetOutputAttribute(Attribute::Index attribute) {
+ switch (attribute) {
+ case Attribute::Index::Position:
+ return "position";
+ default:
+ const u32 index{static_cast<u32>(attribute) -
+ static_cast<u32>(Attribute::Index::Attribute_0)};
+ if (attribute >= Attribute::Index::Attribute_0) {
+ declr_output_attribute.insert(attribute);
+ return "output_attribute_" + std::to_string(index);
+ }
+
+ NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
+ UNREACHABLE();
+ }
+ }
+
+ /// Generates code representing a 19-bit immediate value
+ static std::string GetImmediate19(const Instruction& instr) {
+ return std::to_string(instr.alu.GetImm20_19());
+ }
+
+ /// Generates code representing a 32-bit immediate value
+ static std::string GetImmediate32(const Instruction& instr) {
+ return std::to_string(instr.alu.GetImm20_32());
+ }
+
+ /// Generates code representing a temporary (GPR) register.
+ std::string GetRegister(const Register& reg, unsigned elem = 0) {
+ if (reg == Register::ZeroIndex)
+ return "0";
+ if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
+ // GPRs 0-3 are output color for the fragment shader
+ return std::string{"color."} + "rgba"[(reg + elem) & 3];
+ }
+
+ return *declr_register.insert("register_" + std::to_string(reg + elem)).first;
+ }
+
+ /// Generates code representing a uniform (C buffer) register.
+ std::string GetUniform(const Uniform& reg) {
+ declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index),
+ static_cast<unsigned>(reg.offset), stage);
+ return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']';
+ }
+
+ /// Generates code representing a texture sampler.
+ std::string GetSampler(const Sampler& sampler) const {
+ // TODO(Subv): Support more than just texture sampler 0
+ ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
+ const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
+ static_cast<unsigned>(Sampler::Index::Sampler_0)};
+ return "tex[" + std::to_string(index) + "]";
+ }
+
+ /**
+ * Adds code that calls a subroutine.
+ * @param subroutine the subroutine to call.
+ */
+ void CallSubroutine(const Subroutine& subroutine) {
+ if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
+ shader.AddLine(subroutine.GetName() + "();");
+ shader.AddLine("return true;");
+ } else if (subroutine.exit_method == ExitMethod::Conditional) {
+ shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }");
+ } else {
+ shader.AddLine(subroutine.GetName() + "();");
+ }
+ }
+
+ /**
+ * Writes code that does an assignment operation.
+ * @param reg the destination register code.
+ * @param value the code representing the value to assign.
+ */
+ void SetDest(u64 elem, const std::string& reg, const std::string& value,
+ u64 dest_num_components, u64 value_num_components, bool is_abs = false) {
+ std::string swizzle = ".";
+ swizzle += "xyzw"[elem];
+
+ std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
+ std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
+ src = is_abs ? "abs(" + src + ")" : src;
+
+ shader.AddLine(dest + " = " + src + ";");
+ }
+
+ /*
+ * Writes code that assigns a predicate boolean variable.
+ * @param pred The id of the predicate to write to.
+ * @param value The expression value to assign to the predicate.
+ */
+ void SetPredicate(u64 pred, const std::string& value) {
+ using Tegra::Shader::Pred;
+ // Can't assign to the constant predicate.
+ ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
+
+ std::string variable = 'p' + std::to_string(pred);
+ shader.AddLine(variable + " = " + value + ';');
+ declr_predicates.insert(std::move(variable));
+ }
+
+ /*
+ * Returns the condition to use in the 'if' for a predicated instruction.
+ * @param instr Instruction to generate the if condition for.
+ * @returns string containing the predicate condition.
+ */
+ std::string GetPredicateCondition(Instruction instr) const {
+ using Tegra::Shader::Pred;
+ ASSERT(instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex));
+
+ std::string variable =
+ 'p' + std::to_string(static_cast<u64>(instr.pred.pred_index.Value()));
+
+ if (instr.negate_pred) {
+ return "!(" + variable + ')';
+ }
+
+ return variable;
+ }
+
+ /*
+ * Returns whether the instruction at the specified offset is a 'sched' instruction.
+ * Sched instructions always appear before a sequence of 3 instructions.
+ */
+ bool IsSchedInstruction(u32 offset) const {
+ // sched instructions appear once every 4 instructions.
+ static constexpr size_t SchedPeriod = 4;
+ u32 absolute_offset = offset - main_offset;
+
+ return (absolute_offset % SchedPeriod) == 0;
+ }
+
+ /**
+ * Compiles a single instruction from Tegra to GLSL.
+ * @param offset the offset of the Tegra shader instruction.
+ * @return the offset of the next instruction to execute. Usually it is the current offset
+ * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
+ */
+ u32 CompileInstr(u32 offset) {
+ // Ignore sched instructions when generating code.
+ if (IsSchedInstruction(offset)) {
+ return offset + 1;
+ }
+
+ const Instruction instr = {program_code[offset]};
+ const auto opcode = OpCode::Decode(instr);
+
+ // Decoding failure
+ if (!opcode) {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value);
+ UNREACHABLE();
+ }
+
+ shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName());
+
+ using Tegra::Shader::Pred;
+ ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
+ "NeverExecute predicate not implemented");
+
+ if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ shader.AddLine("if (" + GetPredicateCondition(instr) + ')');
+ shader.AddLine('{');
+ ++shader.scope;
+ }
+
+ switch (opcode->GetType()) {
+ case OpCode::Type::Arithmetic: {
+ std::string dest = GetRegister(instr.gpr0);
+ std::string op_a = instr.alu.negate_a ? "-" : "";
+ op_a += GetRegister(instr.gpr8);
+ if (instr.alu.abs_a) {
+ op_a = "abs(" + op_a + ")";
+ }
+
+ std::string op_b = instr.alu.negate_b ? "-" : "";
+
+ if (instr.is_b_imm) {
+ op_b += GetImmediate19(instr);
+ } else {
+ if (instr.is_b_gpr) {
+ op_b += GetRegister(instr.gpr20);
+ } else {
+ op_b += GetUniform(instr.uniform);
+ }
+ }
+
+ if (instr.alu.abs_b) {
+ op_b = "abs(" + op_b + ")";
+ }
+
+ switch (opcode->GetId()) {
+ case OpCode::Id::FMUL_C:
+ case OpCode::Id::FMUL_R:
+ case OpCode::Id::FMUL_IMM: {
+ SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
+ break;
+ }
+ case OpCode::Id::FMUL32_IMM: {
+ // fmul32i doesn't have abs or neg bits.
+ SetDest(0, dest, GetRegister(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
+ break;
+ }
+ case OpCode::Id::FADD_C:
+ case OpCode::Id::FADD_R:
+ case OpCode::Id::FADD_IMM: {
+ SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
+ break;
+ }
+ case OpCode::Id::MUFU: {
+ switch (instr.sub_op) {
+ case SubOp::Cos:
+ SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d);
+ break;
+ case SubOp::Sin:
+ SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d);
+ break;
+ case SubOp::Ex2:
+ SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+ break;
+ case SubOp::Lg2:
+ SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+ break;
+ case SubOp::Rcp:
+ SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
+ break;
+ case SubOp::Rsq:
+ SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d);
+ break;
+ case SubOp::Min:
+ SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
+ break;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
+ static_cast<unsigned>(instr.sub_op.Value()));
+ UNREACHABLE();
+ }
+ break;
+ }
+ case OpCode::Id::RRO: {
+ NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction");
+ break;
+ }
+ default: {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case OpCode::Type::Ffma: {
+ std::string dest = GetRegister(instr.gpr0);
+ std::string op_a = GetRegister(instr.gpr8);
+ std::string op_b = instr.ffma.negate_b ? "-" : "";
+ std::string op_c = instr.ffma.negate_c ? "-" : "";
+
+ switch (opcode->GetId()) {
+ case OpCode::Id::FFMA_CR: {
+ op_b += GetUniform(instr.uniform);
+ op_c += GetRegister(instr.gpr39);
+ break;
+ }
+ case OpCode::Id::FFMA_RR: {
+ op_b += GetRegister(instr.gpr20);
+ op_c += GetRegister(instr.gpr39);
+ break;
+ }
+ case OpCode::Id::FFMA_RC: {
+ op_b += GetRegister(instr.gpr39);
+ op_c += GetUniform(instr.uniform);
+ break;
+ }
+ case OpCode::Id::FFMA_IMM: {
+ op_b += GetImmediate19(instr);
+ op_c += GetRegister(instr.gpr39);
+ break;
+ }
+ default: {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+
+ SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
+ break;
+ }
+ case OpCode::Type::Memory: {
+ std::string gpr0 = GetRegister(instr.gpr0);
+ const Attribute::Index attribute = instr.attribute.fmt20.index;
+
+ switch (opcode->GetId()) {
+ case OpCode::Id::LD_A: {
+ ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
+ SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
+ break;
+ }
+ case OpCode::Id::ST_A: {
+ ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
+ SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
+ break;
+ }
+ case OpCode::Id::TEXS: {
+ ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+ const std::string op_a = GetRegister(instr.gpr8);
+ const std::string op_b = GetRegister(instr.gpr20);
+ const std::string sampler = GetSampler(instr.sampler);
+ const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ // Add an extra scope and declare the texture coords inside to prevent overwriting
+ // them in case they are used as outputs of the texs instruction.
+ shader.AddLine("{");
+ ++shader.scope;
+ shader.AddLine(coord);
+ const std::string texture = "texture(" + sampler + ", coords)";
+ for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+ SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
+ }
+ --shader.scope;
+ shader.AddLine("}");
+ break;
+ }
+ default: {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case OpCode::Type::FloatPredicate: {
+ std::string op_a = instr.fsetp.neg_a ? "-" : "";
+ op_a += GetRegister(instr.gpr8);
+
+ if (instr.fsetp.abs_a) {
+ op_a = "abs(" + op_a + ')';
+ }
+
+ std::string op_b{};
+
+ if (instr.is_b_imm) {
+ if (instr.fsetp.neg_b) {
+ // Only the immediate version of fsetp has a neg_b bit.
+ op_b += '-';
+ }
+ op_b += '(' + GetImmediate19(instr) + ')';
+ } else {
+ if (instr.is_b_gpr) {
+ op_b += GetRegister(instr.gpr20);
+ } else {
+ op_b += GetUniform(instr.uniform);
+ }
+ }
+
+ if (instr.fsetp.abs_b) {
+ op_b = "abs(" + op_b + ')';
+ }
+
+ using Tegra::Shader::Pred;
+ ASSERT_MSG(instr.fsetp.pred0 == static_cast<u64>(Pred::UnusedIndex) &&
+ instr.fsetp.pred39 == static_cast<u64>(Pred::UnusedIndex),
+ "Compound predicates are not implemented");
+
+ // We can't use the constant predicate as destination.
+ ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+ using Tegra::Shader::PredCondition;
+ switch (instr.fsetp.cond) {
+ case PredCondition::LessThan:
+ SetPredicate(instr.fsetp.pred3, '(' + op_a + ") < (" + op_b + ')');
+ break;
+ case PredCondition::Equal:
+ SetPredicate(instr.fsetp.pred3, '(' + op_a + ") == (" + op_b + ')');
+ break;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})",
+ static_cast<unsigned>(instr.fsetp.cond.Value()), op_a, op_b);
+ UNREACHABLE();
+ }
+ break;
+ }
+ default: {
+ switch (opcode->GetId()) {
+ case OpCode::Id::EXIT: {
+ ASSERT_MSG(instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex),
+ "Predicated exits not implemented");
+ shader.AddLine("return true;");
+ offset = PROGRAM_END - 1;
+ break;
+ }
+ case OpCode::Id::KIL: {
+ shader.AddLine("discard;");
+ break;
+ }
+ case OpCode::Id::IPA: {
+ const auto& attribute = instr.attribute.fmt28;
+ std::string dest = GetRegister(instr.gpr0);
+ SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4);
+ break;
+ }
+ default: {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+
+ break;
+ }
+ }
+
+ // Close the predicate condition scope.
+ if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
+ return offset + 1;
+ }
+
+ /**
+ * Compiles a range of instructions from Tegra to GLSL.
+ * @param begin the offset of the starting instruction.
+ * @param end the offset where the compilation should stop (exclusive).
+ * @return the offset of the next instruction to compile. PROGRAM_END if the program
+ * terminates.
+ */
+ u32 CompileRange(u32 begin, u32 end) {
+ u32 program_counter;
+ for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
+ program_counter = CompileInstr(program_counter);
+ }
+ return program_counter;
+ }
+
+ void Generate() {
+ // Add declarations for all subroutines
+ for (const auto& subroutine : subroutines) {
+ shader.AddLine("bool " + subroutine.GetName() + "();");
+ }
+ shader.AddNewLine();
+
+ // Add the main entry point
+ shader.AddLine("bool exec_shader() {");
+ ++shader.scope;
+ CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
+ --shader.scope;
+ shader.AddLine("}\n");
+
+ // Add definitions for all subroutines
+ for (const auto& subroutine : subroutines) {
+ std::set<u32> labels = subroutine.labels;
+
+ shader.AddLine("bool " + subroutine.GetName() + "() {");
+ ++shader.scope;
+
+ if (labels.empty()) {
+ if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
+ shader.AddLine("return false;");
+ }
+ } else {
+ labels.insert(subroutine.begin);
+ shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
+ shader.AddLine("while (true) {");
+ ++shader.scope;
+
+ shader.AddLine("switch (jmp_to) {");
+
+ for (auto label : labels) {
+ shader.AddLine("case " + std::to_string(label) + "u: {");
+ ++shader.scope;
+
+ auto next_it = labels.lower_bound(label + 1);
+ u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+
+ u32 compile_end = CompileRange(label, next_label);
+ if (compile_end > next_label && compile_end != PROGRAM_END) {
+ // This happens only when there is a label inside a IF/LOOP block
+ shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }");
+ labels.emplace(compile_end);
+ }
+
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
+ shader.AddLine("default: return false;");
+ shader.AddLine('}');
+
+ --shader.scope;
+ shader.AddLine('}');
+
+ shader.AddLine("return false;");
+ }
+
+ --shader.scope;
+ shader.AddLine("}\n");
+
+ DEBUG_ASSERT(shader.scope == 0);
+ }
+
+ GenerateDeclarations();
+ }
+
+ /// Returns a list of constant buffer declarations
+ std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const {
+ std::vector<ConstBufferEntry> result;
+ std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(),
+ std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); });
+ return result;
+ }
+
+ /// Add declarations for registers
+ void GenerateDeclarations() {
+ for (const auto& reg : declr_register) {
+ declarations.AddLine("float " + reg + " = 0.0;");
+ }
+ declarations.AddNewLine();
+
+ for (const auto& index : declr_input_attribute) {
+ // TODO(bunnei): Use proper number of elements for these
+ declarations.AddLine("layout(location = " +
+ std::to_string(static_cast<u32>(index) -
+ static_cast<u32>(Attribute::Index::Attribute_0)) +
+ ") in vec4 " + GetInputAttribute(index) + ";");
+ }
+ declarations.AddNewLine();
+
+ for (const auto& index : declr_output_attribute) {
+ // TODO(bunnei): Use proper number of elements for these
+ declarations.AddLine("layout(location = " +
+ std::to_string(static_cast<u32>(index) -
+ static_cast<u32>(Attribute::Index::Attribute_0)) +
+ ") out vec4 " + GetOutputAttribute(index) + ";");
+ }
+ declarations.AddNewLine();
+
+ unsigned const_buffer_layout = 0;
+ for (const auto& entry : GetConstBuffersDeclarations()) {
+ declarations.AddLine("layout(std430) buffer " + entry.GetName());
+ declarations.AddLine('{');
+ declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];");
+ declarations.AddLine("};");
+ declarations.AddNewLine();
+ ++const_buffer_layout;
+ }
+
+ declarations.AddNewLine();
+ for (const auto& pred : declr_predicates) {
+ declarations.AddLine("bool " + pred + " = false;");
+ }
+ declarations.AddNewLine();
+ }
+
+private:
+ const std::set<Subroutine>& subroutines;
+ const ProgramCode& program_code;
+ const u32 main_offset;
+ Maxwell3D::Regs::ShaderStage stage;
+
+ ShaderWriter shader;
+ ShaderWriter declarations;
+
+ // Declarations
+ std::set<std::string> declr_register;
+ std::set<std::string> declr_predicates;
+ std::set<Attribute::Index> declr_input_attribute;
+ std::set<Attribute::Index> declr_output_attribute;
+ std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
+}; // namespace Decompiler
+
+std::string GetCommonDeclarations() {
+ return "bool exec_shader();";
+}
+
+boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+ Maxwell3D::Regs::ShaderStage stage) {
+ try {
+ auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines();
+ GLSLGenerator generator(subroutines, program_code, main_offset, stage);
+ return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
+ } catch (const DecompileFail& exception) {
+ NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
+ }
+ return boost::none;
}
} // namespace Decompiler
-} // namespace Shader
-} // namespace Maxwell3D
+} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 02ebfcbe8..382c76b7a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -2,26 +2,25 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#pragma once
+
#include <array>
#include <functional>
#include <string>
+#include <boost/optional.hpp>
#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
-namespace Maxwell3D {
-namespace Shader {
+namespace GLShader {
namespace Decompiler {
-constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000};
-constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000};
+using Tegra::Engines::Maxwell3D;
+
+std::string GetCommonDeclarations();
-std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
- const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
- u32 main_offset,
- const std::function<std::string(u32)>& inputreg_getter,
- const std::function<std::string(u32)>& outputreg_getter,
- bool sanitize_mul, const std::string& emit_cb = "",
- const std::string& setemit_cb = "");
+boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+ Maxwell3D::Regs::ShaderStage stage);
} // namespace Decompiler
-} // namespace Shader
-} // namespace Maxwell3D
+} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 8f3c98800..254f6e2c3 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,18 +3,74 @@
// Refer to the license.txt file included.
#include "common/assert.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace GLShader {
-std::string GenerateVertexShader(const MaxwellVSConfig& config) {
- UNREACHABLE();
- return {};
+using Tegra::Engines::Maxwell3D;
+
+static constexpr u32 PROGRAM_OFFSET{10};
+
+ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) {
+ std::string out = "#version 430 core\n";
+ out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ out += Decompiler::GetCommonDeclarations();
+
+ ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
+ Maxwell3D::Regs::ShaderStage::Vertex)
+ .get_value_or({});
+ out += R"(
+
+out gl_PerVertex {
+ vec4 gl_Position;
+};
+
+out vec4 position;
+
+layout (std140) uniform vs_config {
+ vec4 viewport_flip;
+};
+
+void main() {
+ exec_shader();
+
+ // Viewport can be flipped, which is unsupported by glViewport
+ position.xy *= viewport_flip.xy;
+ gl_Position = position;
+}
+)";
+ out += program.first;
+ return {out, program.second};
+}
+
+ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) {
+ std::string out = "#version 430 core\n";
+ out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ out += Decompiler::GetCommonDeclarations();
+
+ ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
+ Maxwell3D::Regs::ShaderStage::Fragment)
+ .get_value_or({});
+ out += R"(
+
+in vec4 position;
+out vec4 color;
+
+layout (std140) uniform fs_config {
+ vec4 viewport_flip;
+};
+
+uniform sampler2D tex[32];
+
+void main() {
+ exec_shader();
}
-std::string GenerateFragmentShader(const MaxwellFSConfig& config) {
- UNREACHABLE();
- return {};
+)";
+ out += program.first;
+ return {out, program.second};
}
} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 5101e7d30..458032b5c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,46 +4,113 @@
#pragma once
-#include <cstring>
+#include <array>
#include <string>
#include <type_traits>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
#include "common/hash.h"
namespace GLShader {
-enum Attributes {
- ATTRIBUTE_POSITION,
- ATTRIBUTE_COLOR,
- ATTRIBUTE_TEXCOORD0,
- ATTRIBUTE_TEXCOORD1,
- ATTRIBUTE_TEXCOORD2,
- ATTRIBUTE_TEXCOORD0_W,
- ATTRIBUTE_NORMQUAT,
- ATTRIBUTE_VIEW,
+constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
+
+using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>;
+
+class ConstBufferEntry {
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+public:
+ void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) {
+ is_used = true;
+ this->index = index;
+ this->stage = stage;
+ max_offset = std::max(max_offset, offset);
+ }
+
+ bool IsUsed() const {
+ return is_used;
+ }
+
+ unsigned GetIndex() const {
+ return index;
+ }
+
+ unsigned GetSize() const {
+ return max_offset + 1;
+ }
+
+ std::string GetName() const {
+ return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index);
+ }
+
+private:
+ static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
+ "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
+ };
+
+ bool is_used{};
+ unsigned index{};
+ unsigned max_offset{};
+ Maxwell::ShaderStage stage;
};
-struct MaxwellShaderConfigCommon {
- explicit MaxwellShaderConfigCommon(){};
+struct ShaderEntries {
+ std::vector<ConstBufferEntry> const_buffer_entries;
};
-struct MaxwellVSConfig : MaxwellShaderConfigCommon {
- explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {}
+using ProgramResult = std::pair<std::string, ShaderEntries>;
- bool operator==(const MaxwellVSConfig& o) const {
- return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0;
- };
+struct ShaderSetup {
+ ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {}
+
+ ProgramCode program_code;
+ bool program_code_hash_dirty = true;
+
+ u64 GetProgramCodeHash() {
+ if (program_code_hash_dirty) {
+ program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
+ program_code_hash_dirty = false;
+ }
+ return program_code_hash;
+ }
+
+private:
+ u64 program_code_hash{};
};
-struct MaxwellFSConfig : MaxwellShaderConfigCommon {
- explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {}
+struct MaxwellShaderConfigCommon {
+ void Init(ShaderSetup& setup) {
+ program_hash = setup.GetProgramCodeHash();
+ }
- bool operator==(const MaxwellFSConfig& o) const {
- return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0;
- };
+ u64 program_hash;
};
-std::string GenerateVertexShader(const MaxwellVSConfig& config);
-std::string GenerateFragmentShader(const MaxwellFSConfig& config);
+struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
+ explicit MaxwellVSConfig(ShaderSetup& setup) {
+ state.Init(setup);
+ }
+};
+
+struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
+ explicit MaxwellFSConfig(ShaderSetup& setup) {
+ state.Init(setup);
+ }
+};
+
+/**
+ * Generates the GLSL vertex shader program source code for the given VS program
+ * @returns String of the shader source code
+ */
+ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config);
+
+/**
+ * Generates the GLSL fragment shader program source code for the given FS program
+ * @returns String of the shader source code
+ */
+ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config);
} // namespace GLShader
@@ -52,14 +119,14 @@ namespace std {
template <>
struct hash<GLShader::MaxwellVSConfig> {
size_t operator()(const GLShader::MaxwellVSConfig& k) const {
- return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig));
+ return k.Hash();
}
};
template <>
struct hash<GLShader::MaxwellFSConfig> {
size_t operator()(const GLShader::MaxwellFSConfig& k) const {
- return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig));
+ return k.Hash();
}
};
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
new file mode 100644
index 000000000..17b3925a0
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -0,0 +1,64 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/kernel/process.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+
+namespace GLShader {
+
+namespace Impl {
+void SetShaderUniformBlockBinding(GLuint shader, const char* name,
+ Maxwell3D::Regs::ShaderStage binding, size_t expected_size) {
+ GLuint ub_index = glGetUniformBlockIndex(shader, name);
+ if (ub_index != GL_INVALID_INDEX) {
+ GLint ub_size = 0;
+ glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
+ ASSERT_MSG(ub_size == expected_size,
+ "Uniform block size did not match! Got %d, expected %zu",
+ static_cast<int>(ub_size), expected_size);
+ glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
+ }
+}
+
+void SetShaderUniformBlockBindings(GLuint shader) {
+ SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex,
+ sizeof(MaxwellUniformData));
+ SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry,
+ sizeof(MaxwellUniformData));
+ SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment,
+ sizeof(MaxwellUniformData));
+}
+
+void SetShaderSamplerBindings(GLuint shader) {
+ OpenGLState cur_state = OpenGLState::GetCurState();
+ GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
+ cur_state.Apply();
+
+ // Set the texture samplers to correspond to different texture units
+ for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
+ // Set the texture samplers to correspond to different texture units
+ std::string uniform_name = "tex[" + std::to_string(texture) + "]";
+ GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
+ if (uniform_tex != -1) {
+ glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
+ }
+ }
+
+ cur_state.draw.shader_program = old_program;
+ cur_state.Apply();
+}
+
+} // namespace Impl
+
+void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ // TODO(bunnei): Support more than one viewport
+ viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0;
+ viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0;
+}
+
+} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
new file mode 100644
index 000000000..e963b4b7e
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -0,0 +1,175 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <tuple>
+#include <unordered_map>
+#include <boost/functional/hash.hpp>
+#include <glad/glad.h>
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+
+namespace GLShader {
+
+/// Number of OpenGL texture samplers that can be used in the fragment shader
+static constexpr size_t NumTextureSamplers = 32;
+
+using Tegra::Engines::Maxwell3D;
+
+namespace Impl {
+void SetShaderUniformBlockBindings(GLuint shader);
+void SetShaderSamplerBindings(GLuint shader);
+} // namespace Impl
+
+/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
+// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
+// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+// Not following that rule will cause problems on some AMD drivers.
+struct MaxwellUniformData {
+ void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
+ alignas(16) GLvec4 viewport_flip;
+};
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) < 16384,
+ "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
+
+class OGLShaderStage {
+public:
+ OGLShaderStage() = default;
+
+ void Create(const ProgramResult& program_result, GLenum type) {
+ OGLShader shader;
+ shader.Create(program_result.first.c_str(), type);
+ program.Create(true, shader.handle);
+ Impl::SetShaderUniformBlockBindings(program.handle);
+ Impl::SetShaderSamplerBindings(program.handle);
+ entries = program_result.second;
+ }
+ GLuint GetHandle() const {
+ return program.handle;
+ }
+
+ ShaderEntries GetEntries() const {
+ return entries;
+ }
+
+private:
+ OGLProgram program;
+ ShaderEntries entries;
+};
+
+// TODO(wwylele): beautify this doc
+// This is a shader cache designed for translating PICA shader to GLSL shader.
+// The double cache is needed because diffent KeyConfigType, which includes a hash of the code
+// region (including its leftover unused code) can generate the same GLSL code.
+template <typename KeyConfigType,
+ ProgramResult (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&),
+ GLenum ShaderType>
+class ShaderCache {
+public:
+ ShaderCache() = default;
+
+ using Result = std::pair<GLuint, ShaderEntries>;
+
+ Result Get(const KeyConfigType& key, const ShaderSetup& setup) {
+ auto map_it = shader_map.find(key);
+ if (map_it == shader_map.end()) {
+ ProgramResult program = CodeGenerator(setup, key);
+
+ auto [iter, new_shader] = shader_cache.emplace(program.first, OGLShaderStage{});
+ OGLShaderStage& cached_shader = iter->second;
+ if (new_shader) {
+ cached_shader.Create(program, ShaderType);
+ }
+ shader_map[key] = &cached_shader;
+ return {cached_shader.GetHandle(), program.second};
+ } else {
+ return {map_it->second->GetHandle(), map_it->second->GetEntries()};
+ }
+ }
+
+private:
+ std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
+ std::unordered_map<std::string, OGLShaderStage> shader_cache;
+};
+
+using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
+
+using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
+
+class ProgramManager {
+public:
+ ProgramManager() {
+ pipeline.Create();
+ }
+
+ ShaderEntries UseProgrammableVertexShader(const MaxwellVSConfig& config,
+ const ShaderSetup setup) {
+ ShaderEntries result;
+ std::tie(current.vs, result) = vertex_shaders.Get(config, setup);
+ return result;
+ }
+
+ ShaderEntries UseProgrammableFragmentShader(const MaxwellFSConfig& config,
+ const ShaderSetup setup) {
+ ShaderEntries result;
+ std::tie(current.fs, result) = fragment_shaders.Get(config, setup);
+ return result;
+ }
+
+ GLuint GetCurrentProgramStage(Maxwell3D::Regs::ShaderStage stage) {
+ switch (stage) {
+ case Maxwell3D::Regs::ShaderStage::Vertex:
+ return current.vs;
+ case Maxwell3D::Regs::ShaderStage::Fragment:
+ return current.fs;
+ }
+
+ UNREACHABLE();
+ }
+
+ void UseTrivialGeometryShader() {
+ current.gs = 0;
+ }
+
+ void ApplyTo(OpenGLState& state) {
+ // Workaround for AMD bug
+ glUseProgramStages(pipeline.handle,
+ GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
+ 0);
+
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs);
+ glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs);
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs);
+ state.draw.shader_program = 0;
+ state.draw.program_pipeline = pipeline.handle;
+ }
+
+private:
+ struct ShaderTuple {
+ GLuint vs = 0, gs = 0, fs = 0;
+ bool operator==(const ShaderTuple& rhs) const {
+ return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
+ }
+ struct Hash {
+ std::size_t operator()(const ShaderTuple& tuple) const {
+ std::size_t hash = 0;
+ boost::hash_combine(hash, tuple.vs);
+ boost::hash_combine(hash, tuple.gs);
+ boost::hash_combine(hash, tuple.fs);
+ return hash;
+ }
+ };
+ };
+ ShaderTuple current;
+ VertexShaders vertex_shaders;
+ FragmentShaders fragment_shaders;
+
+ std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
+ OGLPipeline pipeline;
+};
+
+} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index a6c6204d5..8568fface 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,156 +10,41 @@
namespace GLShader {
-GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
- const char* fragment_shader, const std::vector<const char*>& feedback_vars,
- bool separable_program) {
- // Create the shaders
- GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0;
- GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0;
- GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0;
+GLuint LoadShader(const char* source, GLenum type) {
+ const char* debug_type;
+ switch (type) {
+ case GL_VERTEX_SHADER:
+ debug_type = "vertex";
+ break;
+ case GL_GEOMETRY_SHADER:
+ debug_type = "geometry";
+ break;
+ case GL_FRAGMENT_SHADER:
+ debug_type = "fragment";
+ break;
+ default:
+ UNREACHABLE();
+ }
+ GLuint shader_id = glCreateShader(type);
+ glShaderSource(shader_id, 1, &source, nullptr);
+ NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
+ glCompileShader(shader_id);
GLint result = GL_FALSE;
- int info_log_length;
-
- if (vertex_shader) {
- // Compile Vertex Shader
- LOG_DEBUG(Render_OpenGL, "Compiling vertex shader...");
-
- glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr);
- glCompileShader(vertex_shader_id);
-
- // Check Vertex Shader
- glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
- if (info_log_length > 1) {
- std::vector<char> vertex_shader_error(info_log_length);
- glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
- } else {
- LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s",
- &vertex_shader_error[0]);
- }
- }
- }
-
- if (geometry_shader) {
- // Compile Geometry Shader
- LOG_DEBUG(Render_OpenGL, "Compiling geometry shader...");
-
- glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr);
- glCompileShader(geometry_shader_id);
-
- // Check Geometry Shader
- glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
- if (info_log_length > 1) {
- std::vector<char> geometry_shader_error(info_log_length);
- glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr,
- &geometry_shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]);
- } else {
- LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s",
- &geometry_shader_error[0]);
- }
- }
- }
-
- if (fragment_shader) {
- // Compile Fragment Shader
- LOG_DEBUG(Render_OpenGL, "Compiling fragment shader...");
-
- glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr);
- glCompileShader(fragment_shader_id);
-
- // Check Fragment Shader
- glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
- if (info_log_length > 1) {
- std::vector<char> fragment_shader_error(info_log_length);
- glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr,
- &fragment_shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
- } else {
- LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s",
- &fragment_shader_error[0]);
- }
- }
- }
-
- // Link the program
- LOG_DEBUG(Render_OpenGL, "Linking program...");
-
- GLuint program_id = glCreateProgram();
- if (vertex_shader) {
- glAttachShader(program_id, vertex_shader_id);
- }
- if (geometry_shader) {
- glAttachShader(program_id, geometry_shader_id);
- }
- if (fragment_shader) {
- glAttachShader(program_id, fragment_shader_id);
- }
-
- if (!feedback_vars.empty()) {
- auto varyings = feedback_vars;
- glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()),
- &varyings[0], GL_INTERLEAVED_ATTRIBS);
- }
-
- if (separable_program) {
- glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
- }
-
- glLinkProgram(program_id);
-
- // Check the program
- glGetProgramiv(program_id, GL_LINK_STATUS, &result);
- glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
+ GLint info_log_length;
+ glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
+ glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
if (info_log_length > 1) {
- std::vector<char> program_error(info_log_length);
- glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
+ std::string shader_error(info_log_length, ' ');
+ glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]);
+ NGLOG_DEBUG(Render_OpenGL, "{}", shader_error);
} else {
- LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]);
+ NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
}
}
-
- // If the program linking failed at least one of the shaders was probably bad
- if (result == GL_FALSE) {
- if (vertex_shader) {
- LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
- }
- if (geometry_shader) {
- LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader);
- }
- if (fragment_shader) {
- LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
- }
- }
- ASSERT_MSG(result == GL_TRUE, "Shader not linked");
-
- if (vertex_shader) {
- glDetachShader(program_id, vertex_shader_id);
- glDeleteShader(vertex_shader_id);
- }
- if (geometry_shader) {
- glDetachShader(program_id, geometry_shader_id);
- glDeleteShader(geometry_shader_id);
- }
- if (fragment_shader) {
- glDetachShader(program_id, fragment_shader_id);
- glDeleteShader(fragment_shader_id);
- }
-
- return program_id;
+ return shader_id;
}
} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index fc7b5e080..a1fa9e814 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -6,18 +6,60 @@
#include <vector>
#include <glad/glad.h>
+#include "common/assert.h"
+#include "common/logging/log.h"
namespace GLShader {
/**
+ * Utility function to create and compile an OpenGL GLSL shader
+ * @param source String of the GLSL shader program
+ * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
+ */
+GLuint LoadShader(const char* source, GLenum type);
+
+/**
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
- * @param vertex_shader String of the GLSL vertex shader program
- * @param geometry_shader String of the GLSL geometry shader program
- * @param fragment_shader String of the GLSL fragment shader program
- * @returns Handle of the newly created OpenGL shader object
+ * @param separable_program whether to create a separable program
+ * @param shaders ID of shaders to attach to the program
+ * @returns Handle of the newly created OpenGL program object
*/
-GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
- const char* fragment_shader, const std::vector<const char*>& feedback_vars = {},
- bool separable_program = false);
+template <typename... T>
+GLuint LoadProgram(bool separable_program, T... shaders) {
+ // Link the program
+ NGLOG_DEBUG(Render_OpenGL, "Linking program...");
+
+ GLuint program_id = glCreateProgram();
+
+ ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
+
+ if (separable_program) {
+ glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ }
+
+ glLinkProgram(program_id);
+
+ // Check the program
+ GLint result = GL_FALSE;
+ GLint info_log_length;
+ glGetProgramiv(program_id, GL_LINK_STATUS, &result);
+ glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
+
+ if (info_log_length > 1) {
+ std::string program_error(info_log_length, ' ');
+ glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
+ if (result == GL_TRUE) {
+ NGLOG_DEBUG(Render_OpenGL, "{}", program_error);
+ } else {
+ NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
+ }
+ }
+
+ ASSERT_MSG(result == GL_TRUE, "Shader not linked");
+
+ ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
+
+ return program_id;
+}
} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 6da3a7781..f91dfe36a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,8 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <iterator>
#include <glad/glad.h>
-#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_state.h"
@@ -192,7 +192,7 @@ void OpenGLState::Apply() const {
}
// Textures
- for (unsigned i = 0; i < ARRAY_SIZE(texture_units); ++i) {
+ for (size_t i = 0; i < std::size(texture_units); ++i) {
if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
glActiveTexture(TextureUnits::MaxwellTexture(i).Enum());
glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
@@ -202,6 +202,20 @@ void OpenGLState::Apply() const {
}
}
+ // Constbuffers
+ for (u32 stage = 0; stage < draw.const_buffers.size(); ++stage) {
+ for (u32 buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) {
+ auto& current = cur_state.draw.const_buffers[stage][buffer_id];
+ auto& new_state = draw.const_buffers[stage][buffer_id];
+ if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
+ current.ssbo != new_state.ssbo) {
+ if (new_state.enabled) {
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo);
+ }
+ }
+ }
+ }
+
// Lighting LUTs
if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
glActiveTexture(TextureUnits::LightingLUT.Enum());
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b18af14bb..75c08e645 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -123,6 +123,12 @@ public:
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
+ struct ConstBufferConfig {
+ bool enabled = false;
+ GLuint bindpoint;
+ GLuint ssbo;
+ };
+ std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{};
} draw;
struct {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 4bc2f52e0..e78dc5784 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#pragma once
+
#include <memory>
#include <glad/glad.h>
#include "common/common_types.h"
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 48ee80125..a49265b38 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -10,6 +10,14 @@
#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
+using GLvec2 = std::array<GLfloat, 2>;
+using GLvec3 = std::array<GLfloat, 3>;
+using GLvec4 = std::array<GLfloat, 4>;
+
+using GLuvec2 = std::array<GLuint, 2>;
+using GLuvec3 = std::array<GLuint, 3>;
+using GLuvec4 = std::array<GLuint, 4>;
+
namespace MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -23,7 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_BYTE;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str());
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
@@ -32,17 +40,33 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_FLOAT;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str());
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
+ UNREACHABLE();
+ return {};
+}
+
+inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
+ switch (index_format) {
+ case Maxwell::IndexFormat::UnsignedByte:
+ return GL_UNSIGNED_BYTE;
+ case Maxwell::IndexFormat::UnsignedShort:
+ return GL_UNSIGNED_SHORT;
+ case Maxwell::IndexFormat::UnsignedInt:
+ return GL_UNSIGNED_INT;
+ }
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
UNREACHABLE();
return {};
}
inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
switch (topology) {
+ case Maxwell::PrimitiveTopology::Triangles:
+ return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology);
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
return {};
}
@@ -54,18 +78,90 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) {
case Tegra::Texture::TextureFilter::Nearest:
return GL_NEAREST;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode=%u",
- static_cast<u32>(filter_mode));
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
+ static_cast<u32>(filter_mode));
UNREACHABLE();
return {};
}
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
switch (wrap_mode) {
+ case Tegra::Texture::WrapMode::Wrap:
+ return GL_REPEAT;
case Tegra::Texture::WrapMode::ClampToEdge:
return GL_CLAMP_TO_EDGE;
+ case Tegra::Texture::WrapMode::ClampOGL:
+ // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
+ // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
+ // manually mix them. However the shader part of this is not yet implemented.
+ return GL_CLAMP_TO_BORDER;
+ }
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}",
+ static_cast<u32>(wrap_mode));
+ UNREACHABLE();
+ return {};
+}
+
+inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
+ switch (equation) {
+ case Maxwell::Blend::Equation::Add:
+ return GL_FUNC_ADD;
+ case Maxwell::Blend::Equation::Subtract:
+ return GL_FUNC_SUBTRACT;
+ case Maxwell::Blend::Equation::ReverseSubtract:
+ return GL_FUNC_REVERSE_SUBTRACT;
+ case Maxwell::Blend::Equation::Min:
+ return GL_MIN;
+ case Maxwell::Blend::Equation::Max:
+ return GL_MAX;
+ }
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+ UNREACHABLE();
+ return {};
+}
+
+inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
+ switch (factor) {
+ case Maxwell::Blend::Factor::Zero:
+ return GL_ZERO;
+ case Maxwell::Blend::Factor::One:
+ return GL_ONE;
+ case Maxwell::Blend::Factor::SourceColor:
+ return GL_SRC_COLOR;
+ case Maxwell::Blend::Factor::OneMinusSourceColor:
+ return GL_ONE_MINUS_SRC_COLOR;
+ case Maxwell::Blend::Factor::SourceAlpha:
+ return GL_SRC_ALPHA;
+ case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+ return GL_ONE_MINUS_SRC_ALPHA;
+ case Maxwell::Blend::Factor::DestAlpha:
+ return GL_DST_ALPHA;
+ case Maxwell::Blend::Factor::OneMinusDestAlpha:
+ return GL_ONE_MINUS_DST_ALPHA;
+ case Maxwell::Blend::Factor::DestColor:
+ return GL_DST_COLOR;
+ case Maxwell::Blend::Factor::OneMinusDestColor:
+ return GL_ONE_MINUS_DST_COLOR;
+ case Maxwell::Blend::Factor::SourceAlphaSaturate:
+ return GL_SRC_ALPHA_SATURATE;
+ case Maxwell::Blend::Factor::Source1Color:
+ return GL_SRC1_COLOR;
+ case Maxwell::Blend::Factor::OneMinusSource1Color:
+ return GL_ONE_MINUS_SRC1_COLOR;
+ case Maxwell::Blend::Factor::Source1Alpha:
+ return GL_SRC1_ALPHA;
+ case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+ return GL_ONE_MINUS_SRC1_ALPHA;
+ case Maxwell::Blend::Factor::ConstantColor:
+ return GL_CONSTANT_COLOR;
+ case Maxwell::Blend::Factor::OneMinusConstantColor:
+ return GL_ONE_MINUS_CONSTANT_COLOR;
+ case Maxwell::Blend::Factor::ConstantAlpha:
+ return GL_CONSTANT_ALPHA;
+ case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+ return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode=%u", static_cast<u32>(wrap_mode));
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
UNREACHABLE();
return {};
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 78b50b227..ab0acb20a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -9,13 +9,10 @@
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
-#include "common/bit_field.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
-#include "core/hw/hw.h"
-#include "core/hw/lcd.h"
#include "core/memory.h"
#include "core/settings.h"
#include "core/tracer/recorder.h"
@@ -57,7 +54,7 @@ uniform sampler2D color_texture;
void main() {
// Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
// support more framebuffer pixel formats.
- color = texture(color_texture, frag_tex_coord).abgr;
+ color = texture(color_texture, frag_tex_coord);
}
)";
@@ -210,7 +207,7 @@ void RendererOpenGL::InitOpenGLObjects() {
0.0f);
// Link shaders and get variable locations
- shader.Create(vertex_shader, nullptr, fragment_shader);
+ shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
state.draw.shader_program = shader.handle;
state.Apply();
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
@@ -311,10 +308,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
}
std::array<ScreenRectVertex, 4> vertices = {{
- ScreenRectVertex(x, y, texcoords.top, right),
- ScreenRectVertex(x + w, y, texcoords.bottom, right),
- ScreenRectVertex(x, y + h, texcoords.top, left),
- ScreenRectVertex(x + w, y + h, texcoords.bottom, left),
+ ScreenRectVertex(x, y, texcoords.top, left),
+ ScreenRectVertex(x + w, y, texcoords.bottom, left),
+ ScreenRectVertex(x, y + h, texcoords.top, right),
+ ScreenRectVertex(x + w, y + h, texcoords.bottom, right),
}};
state.texture_units[0].texture_2d = screen_info.display_texture;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c52f40037..2cc6d9a00 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -72,7 +72,7 @@ private:
// OpenGL object IDs
OGLVertexArray vertex_array;
OGLBuffer vertex_buffer;
- OGLShader shader;
+ OGLProgram shader;
/// Display information for Switch screen
ScreenInfo screen_info;
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 2e87281eb..4df687786 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -48,31 +48,39 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::DXT1:
// In this case a 'pixel' actually refers to a 4x4 tile.
return 8;
+ case TextureFormat::DXT23:
+ case TextureFormat::DXT45:
+ // In this case a 'pixel' actually refers to a 4x4 tile.
+ return 16;
case TextureFormat::A8R8G8B8:
return 4;
+ case TextureFormat::B5G6R5:
+ return 2;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
}
}
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+ u32 block_height) {
u8* data = Memory::GetPointer(address);
u32 bytes_per_pixel = BytesPerPixel(format);
- static constexpr u32 DefaultBlockHeight = 16;
-
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) {
case TextureFormat::DXT1:
- // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
+ case TextureFormat::DXT23:
+ case TextureFormat::DXT45:
+ // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
- unswizzled_data.data(), true, DefaultBlockHeight);
+ unswizzled_data.data(), true, block_height);
break;
case TextureFormat::A8R8G8B8:
+ case TextureFormat::B5G6R5:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
- unswizzled_data.data(), true, DefaultBlockHeight);
+ unswizzled_data.data(), true, block_height);
break;
default:
UNIMPLEMENTED_MSG("Format not implemented");
@@ -89,7 +97,10 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
// TODO(Subv): Implement.
switch (format) {
case TextureFormat::DXT1:
+ case TextureFormat::DXT23:
+ case TextureFormat::DXT45:
case TextureFormat::A8R8G8B8:
+ case TextureFormat::B5G6R5:
// TODO(Subv): For the time being just forward the same data without any decoding.
rgba_data = texture_data;
break;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 0c21694ff..a700911cf 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -14,7 +14,8 @@ namespace Texture {
/**
* Unswizzles a swizzled texture without changing its format.
*/
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+ u32 block_height = TICEntry::DefaultBlockHeight);
/**
* Decodes an unswizzled texture into a A8R8G8B8 texture.
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c12ed6e1d..86e45aa88 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@
#pragma once
+#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -13,8 +14,11 @@ namespace Tegra {
namespace Texture {
enum class TextureFormat : u32 {
- A8R8G8B8 = 8,
+ A8R8G8B8 = 0x8,
+ B5G6R5 = 0x15,
DXT1 = 0x24,
+ DXT23 = 0x25,
+ DXT45 = 0x26,
};
enum class TextureType : u32 {
@@ -55,6 +59,8 @@ union TextureHandle {
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
struct TICEntry {
+ static constexpr u32 DefaultBlockHeight = 16;
+
union {
u32 raw;
BitField<0, 7, TextureFormat> format;
@@ -68,7 +74,12 @@ struct TICEntry {
BitField<0, 16, u32> address_high;
BitField<21, 3, TICHeaderVersion> header_version;
};
- INSERT_PADDING_BYTES(4);
+ union {
+ BitField<3, 3, u32> block_height;
+
+ // High 16 bits of the pitch value
+ BitField<0, 16, u32> pitch_high;
+ };
union {
BitField<0, 16, u32> width_minus_1;
BitField<23, 4, TextureType> texture_type;
@@ -80,6 +91,13 @@ struct TICEntry {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
+ u32 Pitch() const {
+ ASSERT(header_version == TICHeaderVersion::Pitch ||
+ header_version == TICHeaderVersion::PitchColorKey);
+ // The pitch value is 21 bits, and is 32B aligned.
+ return pitch_high << 5;
+ }
+
u32 Width() const {
return width_minus_1 + 1;
}
@@ -88,6 +106,13 @@ struct TICEntry {
return height_minus_1 + 1;
}
+ u32 BlockHeight() const {
+ ASSERT(header_version == TICHeaderVersion::BlockLinear ||
+ header_version == TICHeaderVersion::BlockLinearColorKey);
+ // The block height is stored in log2 format.
+ return 1 << block_height;
+ }
+
bool IsTiled() const {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index be0f7e22b..e0a14d48f 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe
const u32 coarse_y = y & ~127;
u32 morton_offset =
GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
+ u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
data_ptrs[morton_to_gl] = morton_data + morton_offset;
data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];