summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp55
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp10
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp2
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp11
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp31
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp52
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/shader/track.cpp20
-rw-r--r--src/video_core/texture_cache/texture_cache.h66
15 files changed, 175 insertions, 90 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 3dfba8197..5e522e0d2 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1179,6 +1179,7 @@ public:
BitField<0, 1, u32> depth_range_0_1;
BitField<3, 1, u32> depth_clamp_near;
BitField<4, 1, u32> depth_clamp_far;
+ BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
INSERT_UNION_PADDING_WORDS(0x1F);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index cde3a26b9..8dae754d4 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -814,6 +814,10 @@ union Instruction {
} alu_integer;
union {
+ BitField<43, 1, u64> x;
+ } iadd;
+
+ union {
BitField<39, 1, u64> ftz;
BitField<32, 1, u64> saturate;
BitField<49, 2, HalfMerge> merge;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6fe155bcc..f33c4a8f9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -348,7 +348,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
texture_cache.GuardRenderTargets(true);
- View depth_surface = texture_cache.GetDepthBufferSurface();
+ View depth_surface = texture_cache.GetDepthBufferSurface(true);
const auto& regs = gpu.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -357,7 +357,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
FramebufferCacheKey key;
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < colors_count; ++index) {
- View color_surface{texture_cache.GetColorBufferSurface(index)};
+ View color_surface{texture_cache.GetColorBufferSurface(index, true)};
if (!color_surface) {
continue;
}
@@ -381,28 +381,52 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
-void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb,
- bool using_stencil_fb) {
+void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
texture_cache.GuardRenderTargets(true);
View color_surface;
- if (using_color_fb) {
+
+ if (using_color) {
+ // Determine if we have to preserve the contents.
+ // First we have to make sure all clear masks are enabled.
+ bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
+ !regs.clear_buffers.B || !regs.clear_buffers.A;
const std::size_t index = regs.clear_buffers.RT;
- color_surface = texture_cache.GetColorBufferSurface(index);
+ if (regs.clear_flags.scissor) {
+ // Then we have to confirm scissor testing clears the whole image.
+ const auto& scissor = regs.scissor_test[0];
+ preserve_contents |= scissor.min_x > 0;
+ preserve_contents |= scissor.min_y > 0;
+ preserve_contents |= scissor.max_x < regs.rt[index].width;
+ preserve_contents |= scissor.max_y < regs.rt[index].height;
+ }
+
+ color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
texture_cache.MarkColorBufferInUse(index);
}
+
View depth_surface;
- if (using_depth_fb || using_stencil_fb) {
- depth_surface = texture_cache.GetDepthBufferSurface();
+ if (using_depth_stencil) {
+ bool preserve_contents = false;
+ if (regs.clear_flags.scissor) {
+ // For depth stencil clears we only have to confirm scissor test covers the whole image.
+ const auto& scissor = regs.scissor_test[0];
+ preserve_contents |= scissor.min_x > 0;
+ preserve_contents |= scissor.min_y > 0;
+ preserve_contents |= scissor.max_x < regs.zeta_width;
+ preserve_contents |= scissor.max_y < regs.zeta_height;
+ }
+
+ depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
texture_cache.MarkDepthBufferInUse();
}
texture_cache.GuardRenderTargets(false);
FramebufferCacheKey key;
- key.colors[0] = color_surface;
- key.zeta = depth_surface;
+ key.colors[0] = std::move(color_surface);
+ key.zeta = std::move(depth_surface);
state_tracker.NotifyFramebuffer();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
@@ -422,8 +446,7 @@ void RasterizerOpenGL::Clear() {
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
- }
- if (use_color) {
+
state_tracker.NotifyColorMask0();
glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
@@ -461,7 +484,7 @@ void RasterizerOpenGL::Clear() {
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- ConfigureClearFramebuffer(use_color, use_depth, use_stencil);
+ ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
if (use_color) {
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
@@ -999,11 +1022,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
}
flags[Dirty::DepthClampEnabled] = false;
- const auto& state = gpu.regs.view_volume_clip_control;
- UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near,
- "Unimplemented depth clamp separation!");
-
- oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near);
+ oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ebd2173eb..87249fb6f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -95,7 +95,8 @@ private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
- void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb);
+ /// Configures the color and depth framebuffer for clearing.
+ void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0cd3ad7e1..3803a6f3a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1870,6 +1870,14 @@ private:
return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
}
+ Expression LogicalAddCarry(Operation operation) {
+ const std::string carry = code.GenerateTemporary();
+ code.AddLine("uint {};", carry);
+ code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
+ VisitOperand(operation, 1).AsUint(), carry);
+ return {fmt::format("({} != 0)", carry), Type::Bool};
+ }
+
Expression LogicalFIsNan(Operation operation) {
return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
}
@@ -2441,6 +2449,8 @@ private:
&GLSLDecompiler::LogicalNotEqual<Type::Uint>,
&GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
+ &GLSLDecompiler::LogicalAddCarry,
+
&GLSLDecompiler::Logical2HLessThan<false>,
&GLSLDecompiler::Logical2HEqual<false>,
&GLSLDecompiler::Logical2HLessEqual<false>,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index a7f256ff9..648b1e71b 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -81,7 +81,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
- depth_clamp_enable.Assign(clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1 ? 1 : 0);
+ depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
cull_face.Assign(PackCullFace(regs.cull_face));
front_face.Assign(packed_front_face);
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 77188b862..8652067a7 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -153,7 +153,7 @@ struct FixedPipelineState {
BitField<4, 1, u32> primitive_restart_enable;
BitField<5, 1, u32> cull_enable;
BitField<6, 1, u32> depth_bias_enable;
- BitField<7, 1, u32> depth_clamp_enable;
+ BitField<7, 1, u32> depth_clamp_disabled;
BitField<8, 1, u32> ndc_minus_one_to_one;
BitField<9, 2, u32> cull_face;
BitField<11, 1, u32> front_face;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 45bd1fc6c..852a17a70 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -249,7 +249,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterization_ci.pNext = nullptr;
rasterization_ci.flags = 0;
- rasterization_ci.depthClampEnable = rs.depth_clamp_enable;
+ rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE;
rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
rasterization_ci.cullMode =
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index c821b1229..776053de5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -656,7 +656,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) {
- color_attachments[rt] = texture_cache.GetColorBufferSurface(rt);
+ color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true;
@@ -664,7 +664,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
}
if (update_rendertargets) {
- zeta_attachment = texture_cache.GetDepthBufferSurface();
+ zeta_attachment = texture_cache.GetDepthBufferSurface(true);
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index aaa138f52..20b6ca0ad 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1584,6 +1584,15 @@ private:
return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat};
}
+ Expression LogicalAddCarry(Operation operation) {
+ const Id op_a = AsUint(Visit(operation[0]));
+ const Id op_b = AsUint(Visit(operation[1]));
+
+ const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
+ const Id carry = OpCompositeExtract(t_uint, result, 1);
+ return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
+ }
+
Expression LogicalAssign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -2518,6 +2527,8 @@ private:
&SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
&SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
+ &SPIRVDecompiler::LogicalAddCarry,
+
&SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
&SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
&SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 9af8c606d..a041519b7 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -35,15 +35,38 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
case OpCode::Id::IADD_C:
case OpCode::Id::IADD_R:
case OpCode::Id::IADD_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
+ UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
+ UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
- const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+ Node value = Operation(OperationCode::UAdd, op_a, op_b);
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ if (instr.iadd.x) {
+ Node carry = GetInternalFlag(InternalFlag::Carry);
+ Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
+ value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
+ }
+
+ if (instr.generates_cc) {
+ const Node i0 = Immediate(0);
+
+ Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
+ Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
+ Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
+
+ Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
+ Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
+ Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
+ Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
+
+ SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
+ SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
+ SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
+ SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
+ }
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::IADD3_C:
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index 8d54cce34..6116c31aa 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <utility>
+
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
@@ -10,20 +12,20 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
namespace {
-constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7;
-}
+constexpr u64 NUM_CONDITION_CODES = 4;
+constexpr u64 NUM_PREDICATES = 7;
+} // namespace
u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr);
-
- const Node apply_mask = [&] {
+ Node apply_mask = [this, opcode, instr] {
switch (opcode->get().GetId()) {
case OpCode::Id::R2P_IMM:
case OpCode::Id::P2R_IMM:
@@ -34,39 +36,43 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
}
}();
- const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
+ const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
+
+ const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
+ const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
+ const auto get_entry = [this, cc](u64 entry) {
+ return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
+ };
switch (opcode->get().GetId()) {
case OpCode::Id::R2P_IMM: {
- const Node mask = GetRegister(instr.gpr8);
+ Node mask = GetRegister(instr.gpr8);
- for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
- const auto shift = static_cast<u32>(pred);
+ for (u64 entry = 0; entry < num_entries; ++entry) {
+ const u32 shift = static_cast<u32>(entry);
- const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
- const Node condition =
- Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+ Node apply = BitfieldExtract(apply_mask, shift, 1);
+ Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
- const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
- const Node value =
- Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+ Node compare = BitfieldExtract(mask, offset + shift, 1);
+ Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
- const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
- bb.push_back(Conditional(condition, {code}));
+ Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
+ bb.push_back(Conditional(condition, {move(code)}));
}
break;
}
case OpCode::Id::P2R_IMM: {
Node value = Immediate(0);
- for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
- Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred),
+ for (u64 entry = 0; entry < num_entries; ++entry) {
+ Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
Immediate(0));
- value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit));
+ value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
}
- value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask);
- value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8);
+ value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
+ value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
- SetRegister(bb, instr.gpr0, std::move(value));
+ SetRegister(bb, instr.gpr0, move(value));
break;
}
default:
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 3eee961f5..3f5a7bc7a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -132,6 +132,8 @@ enum class OperationCode {
LogicalUNotEqual, /// (uint a, uint b) -> bool
LogicalUGreaterEqual, /// (uint a, uint b) -> bool
+ LogicalAddCarry, /// (uint a, uint b) -> bool
+
Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 513e9bf49..eb97bfd41 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -153,21 +153,13 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
return {};
}
- s64 current_cursor = cursor;
- while (current_cursor > 0) {
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1);
- current_cursor = new_cursor;
- if (!source) {
- continue;
- }
- const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor);
- if (base_address != nullptr) {
- return {base_address, index, offset};
- }
+ // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+ // register that it uses as operand
+ const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
+ if (!source) {
+ return {};
}
- return {};
+ return TrackCbuf(source, code, new_cursor);
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index cf6bd005a..d2d2846e6 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -143,7 +143,7 @@ public:
}
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
- const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
+ const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
}
@@ -163,7 +163,7 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
- const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
+ const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
}
@@ -178,7 +178,7 @@ public:
return any_rt;
}
- TView GetDepthBufferSurface() {
+ TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
@@ -199,7 +199,7 @@ public:
return {};
}
const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
- auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true);
+ auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = surface_view.first;
@@ -209,7 +209,7 @@ public:
return surface_view.second;
}
- TView GetColorBufferSurface(std::size_t index) {
+ TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
@@ -239,8 +239,9 @@ public:
return {};
}
- auto surface_view = GetSurface(gpu_addr, *cpu_addr,
- SurfaceParams::CreateForFramebuffer(system, index), true);
+ auto surface_view =
+ GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+ preserve_contents, true);
if (render_targets[index].target) {
auto& surface = render_targets[index].target;
surface->MarkAsRenderTarget(false, NO_RT);
@@ -300,9 +301,9 @@ public:
const std::optional<VAddr> src_cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
std::pair<TSurface, TView> dst_surface =
- GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false);
+ GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
std::pair<TSurface, TView> src_surface =
- GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false);
+ GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
@@ -532,18 +533,22 @@ private:
* @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters for the new surface.
* @param gpu_addr The starting address of the new surface.
+ * @param preserve_contents Indicates that the new surface should be loaded from memory or left
+ * blank.
* @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons.
**/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr,
+ const bool preserve_contents,
const MatchTopologyResult untopological) {
+ const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
for (auto& surface : overlaps) {
Unregister(surface);
}
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
case RecycleStrategy::Ignore: {
- return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
+ return InitializeSurface(gpu_addr, params, do_load);
}
case RecycleStrategy::Flush: {
std::sort(overlaps.begin(), overlaps.end(),
@@ -553,7 +558,7 @@ private:
for (auto& surface : overlaps) {
FlushSurface(surface);
}
- return InitializeSurface(gpu_addr, params);
+ return InitializeSurface(gpu_addr, params, preserve_contents);
}
case RecycleStrategy::BufferCopy: {
auto new_surface = GetUncachedSurface(gpu_addr, params);
@@ -562,7 +567,7 @@ private:
}
default: {
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
- return InitializeSurface(gpu_addr, params);
+ return InitializeSurface(gpu_addr, params, do_load);
}
}
}
@@ -700,11 +705,14 @@ private:
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
* @param cpu_addr The starting address of the new surface on physical memory.
+ * @param preserve_contents Indicates that the new surface should be loaded from memory or
+ * left blank.
*/
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr,
- const VAddr cpu_addr) {
+ const VAddr cpu_addr,
+ bool preserve_contents) {
if (params.target == SurfaceTarget::Texture3D) {
bool failed = false;
if (params.num_levels > 1) {
@@ -754,7 +762,7 @@ private:
return std::nullopt;
}
Unregister(surface);
- return InitializeSurface(gpu_addr, params);
+ return InitializeSurface(gpu_addr, params, preserve_contents);
}
return std::nullopt;
}
@@ -765,7 +773,7 @@ private:
return {{surface, surface->GetMainView()}};
}
}
- return InitializeSurface(gpu_addr, params);
+ return InitializeSurface(gpu_addr, params, preserve_contents);
}
}
@@ -788,10 +796,13 @@ private:
*
* @param gpu_addr The starting address of the candidate surface.
* @param params The parameters on the candidate surface.
+ * @param preserve_contents Indicates that the new surface should be loaded from memory or
+ * left blank.
* @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
- const SurfaceParams& params, bool is_render) {
+ const SurfaceParams& params, bool preserve_contents,
+ bool is_render) {
// Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done.
@@ -800,7 +811,8 @@ private:
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
std::vector<TSurface> overlaps{current_surface};
- return RecycleSurface(overlaps, params, gpu_addr, topological_result);
+ return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+ topological_result);
}
const auto struct_result = current_surface->MatchesStructure(params);
@@ -825,7 +837,7 @@ private:
// If none are found, we are done. we just load the surface and create it.
if (overlaps.empty()) {
- return InitializeSurface(gpu_addr, params);
+ return InitializeSurface(gpu_addr, params, preserve_contents);
}
// Step 3
@@ -835,13 +847,15 @@ private:
for (const auto& surface : overlaps) {
const auto topological_result = surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
- return RecycleSurface(overlaps, params, gpu_addr, topological_result);
+ return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+ topological_result);
}
}
// Check if it's a 3D texture
if (params.block_depth > 0) {
- auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr);
+ auto surface =
+ Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
if (surface) {
return *surface;
}
@@ -861,7 +875,8 @@ private:
return *view;
}
}
- return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
+ return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+ MatchTopologyResult::FullMatch);
}
// Now we check if the candidate is a mipmap/layer of the overlap
std::optional<TView> view =
@@ -885,7 +900,7 @@ private:
pair.first->EmplaceView(params, gpu_addr, candidate_size);
if (mirage_view)
return {pair.first, *mirage_view};
- return RecycleSurface(overlaps, params, gpu_addr,
+ return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
return {current_surface, *view};
@@ -901,7 +916,8 @@ private:
}
}
// We failed all the tests, recycle the overlaps into a new texture.
- return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
+ return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+ MatchTopologyResult::FullMatch);
}
/**
@@ -1059,10 +1075,10 @@ private:
}
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
- bool do_load = true) {
+ bool preserve_contents) {
auto new_surface{GetUncachedSurface(gpu_addr, params)};
Register(new_surface);
- if (do_load) {
+ if (preserve_contents) {
LoadSurface(new_surface);
}
return {new_surface, new_surface->GetMainView()};