summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp94
-rw-r--r--src/video_core/engines/maxwell_3d.h95
-rw-r--r--src/video_core/engines/shader_bytecode.h47
-rw-r--r--src/video_core/engines/shader_header.h5
4 files changed, 209 insertions, 32 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index bca014a4a..6de07ea56 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <cinttypes>
+#include <cstring>
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
@@ -19,21 +20,69 @@ namespace Tegra::Engines {
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
- : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {}
+ : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
+ InitializeRegisterDefaults();
+}
+
+void Maxwell3D::InitializeRegisterDefaults() {
+ // Initializes registers to their default values - what games expect them to be at boot. This is
+ // for certain registers that may not be explicitly set by games.
+
+ // Reset all registers to zero
+ std::memset(&regs, 0, sizeof(regs));
+
+ // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
+ // needed for ARMS.
+ for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
+ regs.viewport[viewport].depth_range_near = 0.0f;
+ regs.viewport[viewport].depth_range_far = 1.0f;
+ }
+ // Doom and Bomberman seems to use the uninitialized registers and just enable blend
+ // so initialize blend registers with sane values
+ regs.blend.equation_rgb = Regs::Blend::Equation::Add;
+ regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
+ regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
+ regs.blend.equation_a = Regs::Blend::Equation::Add;
+ regs.blend.factor_source_a = Regs::Blend::Factor::One;
+ regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
+ for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
+ regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
+ regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
+ regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
+ regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
+ regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
+ regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
+ }
+ regs.stencil_front_op_fail = Regs::StencilOp::Keep;
+ regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
+ regs.stencil_front_op_zpass = Regs::StencilOp::Keep;
+ regs.stencil_front_func_func = Regs::ComparisonOp::Always;
+ regs.stencil_front_func_mask = 0xFFFFFFFF;
+ regs.stencil_front_mask = 0xFFFFFFFF;
+ regs.stencil_two_side_enable = 1;
+ regs.stencil_back_op_fail = Regs::StencilOp::Keep;
+ regs.stencil_back_op_zfail = Regs::StencilOp::Keep;
+ regs.stencil_back_op_zpass = Regs::StencilOp::Keep;
+ regs.stencil_back_func_func = Regs::ComparisonOp::Always;
+ regs.stencil_back_func_mask = 0xFFFFFFFF;
+ regs.stencil_back_mask = 0xFFFFFFFF;
+}
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
// Reset the current macro.
executing_macro = 0;
- // The requested macro must have been uploaded already.
- auto macro_code = uploaded_macros.find(method);
- if (macro_code == uploaded_macros.end()) {
- LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
+ // Lookup the macro offset
+ const u32 entry{(method - MacroRegistersStart) >> 1};
+ const auto& search{macro_offsets.find(entry)};
+ if (search == macro_offsets.end()) {
+ LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
+ UNREACHABLE();
return;
}
// Execute the current macro.
- macro_interpreter.Execute(macro_code->second, std::move(parameters));
+ macro_interpreter.Execute(search->second, std::move(parameters));
}
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
@@ -72,13 +121,23 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
- regs.reg_array[method] = value;
+ if (regs.reg_array[method] != value) {
+ regs.reg_array[method] = value;
+ if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+ method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+ dirty_flags.vertex_attrib_format = true;
+ }
+ }
switch (method) {
case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(value);
break;
}
+ case MAXWELL3D_REG_INDEX(macros.bind): {
+ ProcessMacroBind(value);
+ break;
+ }
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -140,22 +199,25 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
}
void Maxwell3D::ProcessMacroUpload(u32 data) {
- // Store the uploaded macro code to interpret them when they're called.
- auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
- macro.push_back(data);
+ ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
+ "upload_address exceeded macro_memory size!");
+ macro_memory[regs.macros.upload_address++] = data;
+}
+
+void Maxwell3D::ProcessMacroBind(u32 data) {
+ macro_offsets[regs.macros.entry] = data;
}
void Maxwell3D::ProcessQueryGet() {
GPUVAddr sequence_address = regs.query.QueryAddress();
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
// VAddr before writing.
- boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
+ std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
- u32 value = Memory::Read32(*address);
u64 result = 0;
// TODO(Subv): Support the other query variables
@@ -268,7 +330,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
- boost::optional<VAddr> address =
+ std::optional<VAddr> address =
memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
Memory::Write32(*address, value);
@@ -281,7 +343,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
GPUVAddr tic_base_address = regs.tic.TICAddress();
GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
- boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+ std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
Texture::TICEntry tic_entry;
Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -305,7 +367,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
- boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+ std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
Texture::TSCEntry tsc_entry;
Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -369,7 +431,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
- boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+ std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
Texture::FullTextureInfo tex_info{};
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0e09a7ee5..91ca57883 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -345,6 +345,14 @@ public:
Invert = 6,
IncrWrap = 7,
DecrWrap = 8,
+ KeepOGL = 0x1E00,
+ ZeroOGL = 0,
+ ReplaceOGL = 0x1E01,
+ IncrOGL = 0x1E02,
+ DecrOGL = 0x1E03,
+ InvertOGL = 0x150A,
+ IncrWrapOGL = 0x8507,
+ DecrWrapOGL = 0x8508,
};
enum class MemoryLayout : u32 {
@@ -462,6 +470,16 @@ public:
}
};
+ struct ColorMask {
+ union {
+ u32 raw;
+ BitField<0, 4, u32> R;
+ BitField<4, 4, u32> G;
+ BitField<8, 4, u32> B;
+ BitField<12, 4, u32> A;
+ };
+ };
+
bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -475,12 +493,13 @@ public:
INSERT_PADDING_WORDS(0x45);
struct {
- INSERT_PADDING_WORDS(1);
+ u32 upload_address;
u32 data;
u32 entry;
+ u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x189);
+ INSERT_PADDING_WORDS(0x188);
u32 tfb_enabled;
@@ -570,7 +589,11 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0x13);
+ INSERT_PADDING_WORDS(0xC);
+
+ u32 color_mask_common;
+
+ INSERT_PADDING_WORDS(0x6);
u32 rt_separate_frag_data;
@@ -645,8 +668,14 @@ public:
ComparisonOp depth_test_func;
float alpha_test_ref;
ComparisonOp alpha_test_func;
-
- INSERT_PADDING_WORDS(0x9);
+ u32 draw_tfb_stride;
+ struct {
+ float r;
+ float g;
+ float b;
+ float a;
+ } blend_color;
+ INSERT_PADDING_WORDS(0x4);
struct {
u32 separate_alpha;
@@ -723,7 +752,11 @@ public:
StencilOp stencil_back_op_zpass;
ComparisonOp stencil_back_func_func;
- INSERT_PADDING_WORDS(0x17);
+ INSERT_PADDING_WORDS(0x4);
+
+ u32 framebuffer_srgb;
+
+ INSERT_PADDING_WORDS(0x12);
union {
BitField<2, 1, u32> coord_origin;
@@ -751,7 +784,14 @@ public:
};
} draw;
- INSERT_PADDING_WORDS(0x6B);
+ INSERT_PADDING_WORDS(0xA);
+
+ struct {
+ u32 enabled;
+ u32 index;
+ } primitive_restart;
+
+ INSERT_PADDING_WORDS(0x5F);
struct {
u32 start_addr_high;
@@ -829,8 +869,9 @@ public:
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
-
- INSERT_PADDING_WORDS(0x4B);
+ INSERT_PADDING_WORDS(0xB);
+ std::array<ColorMask, NumRenderTargets> color_mask;
+ INSERT_PADDING_WORDS(0x38);
struct {
u32 query_address_high;
@@ -971,6 +1012,12 @@ public:
State state{};
MemoryManager& memory_manager;
+ struct DirtyFlags {
+ bool vertex_attrib_format = true;
+ };
+
+ DirtyFlags dirty_flags;
+
/// Reads a register value located at the input method address
u32 GetRegisterValue(u32 method) const;
@@ -983,10 +1030,25 @@ public:
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
+ /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
+ /// we've seen used.
+ using MacroMemory = std::array<u32, 0x40000>;
+
+ /// Gets a reference to macro memory.
+ const MacroMemory& GetMacroMemory() const {
+ return macro_memory;
+ }
+
private:
+ void InitializeRegisterDefaults();
+
VideoCore::RasterizerInterface& rasterizer;
- std::unordered_map<u32, std::vector<u32>> uploaded_macros;
+ /// Start offsets of each macro in macro_memory
+ std::unordered_map<u32, u32> macro_offsets;
+
+ /// Memory for macro code
+ MacroMemory macro_memory;
/// Macro method that is currently being executed / being fed parameters.
u32 executing_macro = 0;
@@ -1009,9 +1071,12 @@ private:
*/
void CallMacroMethod(u32 method, std::vector<u32> parameters);
- /// Handles writes to the macro uploading registers.
+ /// Handles writes to the macro uploading register.
void ProcessMacroUpload(u32 data);
+ /// Handles writes to the macro bind register.
+ void ProcessMacroBind(u32 data);
+
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers();
@@ -1045,6 +1110,7 @@ ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1057,6 +1123,10 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
+ASSERT_REG_POSITION(alpha_test_ref, 0x4C4);
+ASSERT_REG_POSITION(alpha_test_func, 0x4C5);
+ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6);
+ASSERT_REG_POSITION(blend_color, 0x4C7);
ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(stencil_enable, 0x4E0);
ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
@@ -1077,14 +1147,17 @@ ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
+ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
+ASSERT_REG_POSITION(color_mask, 0x680);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 6cd08d28b..83a6fd875 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -5,12 +5,11 @@
#pragma once
#include <bitset>
+#include <optional>
#include <string>
#include <tuple>
#include <vector>
-#include <boost/optional.hpp>
-
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -79,6 +78,7 @@ union Attribute {
constexpr explicit Attribute(u64 value) : value(value) {}
enum class Index : u64 {
+ PointSize = 6,
Position = 7,
Attribute_0 = 8,
Attribute_31 = 39,
@@ -207,6 +207,16 @@ enum class UniformType : u64 {
Double = 5,
};
+enum class StoreType : u64 {
+ Unsigned8 = 0,
+ Signed8 = 1,
+ Unsigned16 = 2,
+ Signed16 = 3,
+ Bytes32 = 4,
+ Bytes64 = 5,
+ Bytes128 = 6,
+};
+
enum class IMinMaxExchange : u64 {
None = 0,
XLo = 1,
@@ -568,6 +578,10 @@ union Instruction {
} fmul32;
union {
+ BitField<52, 1, u64> generates_cc;
+ } op_32;
+
+ union {
BitField<48, 1, u64> is_signed;
} shift;
@@ -747,6 +761,18 @@ union Instruction {
} ld_c;
union {
+ BitField<48, 3, StoreType> type;
+ } ldst_sl;
+
+ union {
+ BitField<44, 2, u64> unknown;
+ } ld_l;
+
+ union {
+ BitField<44, 2, u64> unknown;
+ } st_l;
+
+ union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
BitField<7, 1, u64> abs_a;
@@ -1208,6 +1234,8 @@ union Instruction {
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
BitField<59, 1, u64> is_c_gpr;
+ BitField<20, 24, s64> smem_imm;
+ BitField<0, 5, ControlCode> flow_control_code;
Attribute attribute;
Sampler sampler;
@@ -1231,8 +1259,12 @@ public:
BRA,
PBK,
LD_A,
+ LD_L,
+ LD_S,
LD_C,
ST_A,
+ ST_L,
+ ST_S,
LDG, // Load from global memory
STG, // Store in global memory
TEX,
@@ -1428,7 +1460,7 @@ public:
Type type;
};
- static boost::optional<const Matcher&> Decode(Instruction instr) {
+ static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
static const auto table{GetDecodeTable()};
const auto matches_instruction = [instr](const auto& matcher) {
@@ -1436,7 +1468,8 @@ public:
};
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
- return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none;
+ return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
+ : std::nullopt;
}
private:
@@ -1489,8 +1522,12 @@ private:
INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+ INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
+ INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+ INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
+ INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
@@ -1626,4 +1663,4 @@ private:
}
};
-} // namespace Tegra::Shader \ No newline at end of file
+} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a885ee3cf..a0e015c4b 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -96,6 +96,11 @@ struct Header {
}
} ps;
};
+
+ u64 GetLocalMemorySize() {
+ return (common1.shader_local_memory_low_size |
+ (common2.shader_local_memory_high_size << 24));
+ }
};
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");