summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h5
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
-rw-r--r--src/video_core/texture_cache/surface_base.cpp3
8 files changed, 144 insertions, 35 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e7cb87589..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -661,6 +661,10 @@ union Instruction {
constexpr Instruction(u64 value) : value{value} {}
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
+ constexpr bool Bit(u64 offset) const {
+ return ((value >> offset) & 1) != 0;
+ }
+
BitField<0, 8, Register> gpr0;
BitField<8, 8, Register> gpr8;
union {
@@ -1874,7 +1878,9 @@ public:
HSETP2_C,
HSETP2_R,
HSETP2_IMM,
+ HSET2_C,
HSET2_R,
+ HSET2_IMM,
POPC_C,
POPC_R,
POPC_IMM,
@@ -2194,7 +2200,9 @@ private:
INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
+ INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
+ INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 447a19595..b6b6659c1 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -178,7 +178,7 @@ bool IsASTCSupported() {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
GLint value;
- glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value);
+ glGetInternalformativ(target, format, support, 1, &value);
if (value != GL_FULL_SUPPORT) {
return false;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 46e780a06..c6a3bf3a1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -460,8 +460,9 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const u8* host_ptr_b = memory_manager.GetPointer(address_b);
code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
}
+ const std::size_t code_size = code.size() * sizeof(u64);
- const auto unique_identifier = GetUniqueIdentifier(
+ const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const ShaderParameters params{system, disk_cache, device,
@@ -477,7 +478,7 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
Shader* const result = shader.get();
if (cpu_addr) {
- Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
+ Register(std::move(shader), *cpu_addr, code_size);
} else {
null_shader = std::move(shader);
}
@@ -495,8 +496,9 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
- auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
- const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
+ ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
+ const std::size_t code_size{code.size() * sizeof(u64)};
+ const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const ShaderParameters params{system, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
@@ -511,7 +513,7 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
Shader* const result = kernel.get();
if (cpu_addr) {
- Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
+ Register(std::move(kernel), *cpu_addr, code_size);
} else {
null_kernel = std::move(kernel);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6848f1388..994aaeaf2 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -37,7 +37,6 @@ namespace OpenGL {
class Device;
class RasterizerOpenGL;
-struct UnspecializedShader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index e3714ee6d..a8d94eac3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
}
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
+ if (!is_clear) {
+ return true;
+ }
+ // First we have to make sure all clear masks are enabled.
+ if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
+ !regs.clear_buffers.A) {
+ return true;
+ }
+ // If scissors are disabled, the whole screen is cleared
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Then we have to confirm scissor testing clears the whole image
+ const std::size_t index = regs.clear_buffers.RT;
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
+ scissor.max_y < regs.rt[index].height;
+}
+
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
+ // If we are not clearing, the contents have to be preserved
+ if (!is_clear) {
+ return true;
+ }
+ // For depth stencil clears we only have to confirm scissor test covers the whole image
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Make sure the clear cover the whole image
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
+ scissor.max_y < regs.zeta_height;
+}
+
} // Anonymous namespace
class BufferBindings final {
@@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
buffer_cache.Unmap();
- const Texceptions texceptions = UpdateAttachments();
+ const Texceptions texceptions = UpdateAttachments(false);
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
key.renderpass_params = GetRenderPassParams(texceptions);
@@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() {
return;
}
- [[maybe_unused]] const auto texceptions = UpdateAttachments();
+ [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
DEBUG_ASSERT(texceptions.none());
SetupImageTransitions(0, color_attachments, zeta_attachment);
@@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
+RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& dirty = system.GPU().Maxwell3D().dirty.flags;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ auto& dirty = maxwell3d.dirty.flags;
+ auto& regs = maxwell3d.regs;
+
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false;
@@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) {
- color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+ const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
+ color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true;
@@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
}
if (update_rendertargets) {
- zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+ const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
+ zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c8c187606..83e00e7e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -159,7 +159,10 @@ private:
void FlushWork();
- Texceptions UpdateAttachments();
+ /// @brief Updates the currently bound attachments
+ /// @param is_clear True when the framebuffer is updated as a clear
+ /// @return Bitfield of attachments being used as sampled textures
+ Texceptions UpdateAttachments(bool is_clear);
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ PredCondition cond;
+ bool bf;
+ bool ftz;
+ bool neg_a;
+ bool abs_a;
+ bool neg_b;
+ bool abs_b;
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ case OpCode::Id::HSET2_IMM:
+ cond = instr.hsetp2.cbuf_and_imm.cond;
+ bf = instr.Bit(53);
+ ftz = instr.Bit(54);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(56);
+ abs_b = instr.Bit(54);
+ break;
+ case OpCode::Id::HSET2_R:
+ cond = instr.hsetp2.reg.cond;
+ bf = instr.Bit(49);
+ ftz = instr.Bit(50);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(31);
+ abs_b = instr.Bit(30);
+ break;
+ default:
+ UNREACHABLE();
}
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
-
- Node op_b = [&]() {
+ Node op_b = [this, instr, opcode] {
switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ // Inform as unimplemented as this is not tested.
+ UNIMPLEMENTED_MSG("HSET2_C is not implemented");
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
case OpCode::Id::HSET2_R:
return GetRegister(instr.gpr20);
+ case OpCode::Id::HSET2_IMM:
+ return UnpackHalfImmediate(instr, true);
default:
UNREACHABLE();
- return Immediate(0);
+ return Node{};
}
}();
- op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
- op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
- const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+ if (!ftz) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
+
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+ op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
+
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_R:
+ op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
+ [[fallthrough]];
+ case OpCode::Id::HSET2_C:
+ op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
+ break;
+ default:
+ break;
+ }
- const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
+ Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+ Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
// HSET2 operates on each half float in the pack.
std::array<Node, 2> values;
for (u32 i = 0; i < 2; ++i) {
- const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
- const Node true_value = Immediate(raw_value << (i * 16));
- const Node false_value = Immediate(0);
-
- const Node comparison =
- Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
- const Node predicate = Operation(combiner, comparison, second_pred);
+ const u32 raw_value = bf ? 0x3c00 : 0xffff;
+ Node true_value = Immediate(raw_value << (i * 16));
+ Node false_value = Immediate(0);
+ Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+ Node predicate = Operation(combiner, comparison, second_pred);
values[i] =
- Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+ Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
}
- const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
- SetRegister(bb, instr.gpr0, value);
+ Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
+ SetRegister(bb, instr.gpr0, move(value));
return pc;
}
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 94d3a6ae5..0caf3b4f0 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
}
const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
const auto layer{static_cast<u32>(relative_address / layer_size)};
+ if (layer >= params.depth) {
+ return {};
+ }
const GPUVAddr mipmap_address = relative_address - layer_size * layer;
const auto mipmap_it =
Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);