summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt14
-rw-r--r--src/video_core/engines/kepler_compute.cpp34
-rw-r--r--src/video_core/engines/kepler_compute.h (renamed from src/video_core/engines/maxwell_compute.h)31
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/engines/maxwell_compute.cpp28
-rw-r--r--src/video_core/engines/shader_bytecode.h2
-rw-r--r--src/video_core/gpu.cpp11
-rw-r--r--src/video_core/gpu.h7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp136
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
-rw-r--r--src/video_core/renderer_vulkan/declarations.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp231
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h116
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp4
-rw-r--r--src/video_core/shader/decode/conversion.cpp6
-rw-r--r--src/video_core/shader/decode/memory.cpp159
-rw-r--r--src/video_core/shader/shader_ir.h22
-rw-r--r--src/video_core/surface.cpp2
18 files changed, 626 insertions, 230 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 33e507e69..d35a738d5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,12 +5,12 @@ add_library(video_core STATIC
debug_utils/debug_utils.h
engines/fermi_2d.cpp
engines/fermi_2d.h
+ engines/kepler_compute.cpp
+ engines/kepler_compute.h
engines/kepler_memory.cpp
engines/kepler_memory.h
engines/maxwell_3d.cpp
engines/maxwell_3d.h
- engines/maxwell_compute.cpp
- engines/maxwell_compute.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
engines/shader_bytecode.h
@@ -101,6 +101,16 @@ add_library(video_core STATIC
video_core.h
)
+if (ENABLE_VULKAN)
+ target_sources(video_core PRIVATE
+ renderer_vulkan/declarations.h
+ renderer_vulkan/vk_device.cpp
+ renderer_vulkan/vk_device.h)
+
+ target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
+ target_compile_definitions(video_core PRIVATE HAS_VULKAN)
+endif()
+
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
new file mode 100644
index 000000000..4ca856b6b
--- /dev/null
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -0,0 +1,34 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Engines {
+
+KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+
+KeplerCompute::~KeplerCompute() = default;
+
+void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
+ ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+ "Invalid KeplerCompute register, increase the size of the Regs structure");
+
+ regs.reg_array[method_call.method] = method_call.argument;
+
+ switch (method_call.method) {
+ case KEPLER_COMPUTE_REG_INDEX(launch):
+ // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
+ // kernels)
+ UNREACHABLE_MSG("Compute shaders are not implemented");
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h
index 1d71f11bd..df0a32e0f 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,47 +10,48 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
namespace Tegra::Engines {
-#define MAXWELL_COMPUTE_REG_INDEX(field_name) \
- (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32))
+#define KEPLER_COMPUTE_REG_INDEX(field_name) \
+ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-class MaxwellCompute final {
+class KeplerCompute final {
public:
- MaxwellCompute() = default;
- ~MaxwellCompute() = default;
+ explicit KeplerCompute(MemoryManager& memory_manager);
+ ~KeplerCompute();
+
+ static constexpr std::size_t NumConstBuffers = 8;
struct Regs {
static constexpr std::size_t NUM_REGS = 0xCF8;
union {
struct {
- INSERT_PADDING_WORDS(0x281);
+ INSERT_PADDING_WORDS(0xAF);
- union {
- u32 compute_end;
- BitField<0, 1, u32> unknown;
- } compute;
+ u32 launch;
- INSERT_PADDING_WORDS(0xA76);
+ INSERT_PADDING_WORDS(0xC48);
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
-
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
- "MaxwellCompute Regs has wrong size");
+ "KeplerCompute Regs has wrong size");
+
+ MemoryManager& memory_manager;
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
};
#define ASSERT_REG_POSITION(field_name, position) \
- static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \
+ static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(compute, 0x281);
+ASSERT_REG_POSITION(launch, 0xAF);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 10eae6a65..19b6b14b2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
LongQueryResult query_result{};
query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
- query_result.timestamp = CoreTiming::GetTicks();
+ query_result.timestamp = Core::Timing::GetTicks();
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
}
dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
deleted file mode 100644
index 656db6a61..000000000
--- a/src/video_core/engines/maxwell_compute.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_compute.h"
-
-namespace Tegra::Engines {
-
-void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
- ASSERT_MSG(method_call.method < Regs::NUM_REGS,
- "Invalid MaxwellCompute register, increase the size of the Regs structure");
-
- regs.reg_array[method_call.method] = method_call.argument;
-
- switch (method_call.method) {
- case MAXWELL_COMPUTE_REG_INDEX(compute): {
- LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
- UNREACHABLE();
- break;
- }
- default:
- break;
- }
-}
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 269df9437..1f425f90b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -186,7 +186,7 @@ enum class SubOp : u64 {
};
enum class F2iRoundingOp : u64 {
- None = 0,
+ RoundEven = 0,
Floor = 1,
Ceil = 2,
Trunc = 3,
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index d3d32a359..3d00c308b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,9 +6,9 @@
#include "core/core_timing.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/maxwell_compute.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
@@ -18,6 +18,7 @@ namespace Tegra {
u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
switch (format) {
case PixelFormat::ABGR8:
+ case PixelFormat::BGRA8:
return 4;
default:
return 4;
@@ -31,7 +32,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
- maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
+ kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
}
@@ -245,8 +246,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
case EngineID::MAXWELL_B:
maxwell_3d->CallMethod(method_call);
break;
- case EngineID::MAXWELL_COMPUTE_B:
- maxwell_compute->CallMethod(method_call);
+ case EngineID::KEPLER_COMPUTE_B:
+ kepler_compute->CallMethod(method_call);
break;
case EngineID::MAXWELL_DMA_COPY_A:
maxwell_dma->CallMethod(method_call);
@@ -282,7 +283,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
- block.timestamp = CoreTiming::GetTicks();
+ block.timestamp = Core::Timing::GetTicks();
Memory::WriteBlock(*address, &block, sizeof(block));
} else {
const auto address =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fb8975811..a482196ea 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -80,6 +80,7 @@ class DebugContext;
struct FramebufferConfig {
enum class PixelFormat : u32 {
ABGR8 = 1,
+ BGRA8 = 5,
};
/**
@@ -102,15 +103,15 @@ struct FramebufferConfig {
namespace Engines {
class Fermi2D;
class Maxwell3D;
-class MaxwellCompute;
class MaxwellDMA;
+class KeplerCompute;
class KeplerMemory;
} // namespace Engines
enum class EngineID {
FERMI_TWOD_A = 0x902D, // 2D Engine
MAXWELL_B = 0xB197, // 3D Engine
- MAXWELL_COMPUTE_B = 0xB1C0,
+ KEPLER_COMPUTE_B = 0xB1C0,
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
MAXWELL_DMA_COPY_A = 0xB0B5,
};
@@ -208,7 +209,7 @@ private:
/// 2D engine
std::unique_ptr<Engines::Fermi2D> fermi_2d;
/// Compute engine
- std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
+ std::unique_ptr<Engines::KeplerCompute> kepler_compute;
/// DMA engine
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 70e124dc4..db18f4dbe 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -616,17 +616,8 @@ private:
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
std::string value = VisitOperand(operation, operand_index);
-
switch (type) {
- case Type::Bool:
- case Type::Bool2:
- case Type::Float:
- return value;
- case Type::Int:
- return "ftoi(" + value + ')';
- case Type::Uint:
- return "ftou(" + value + ')';
- case Type::HalfFloat:
+ case Type::HalfFloat: {
const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
if (!half_meta) {
value = "toHalf2(" + value + ')';
@@ -643,6 +634,26 @@ private:
return "vec2(toHalf2(" + value + ")[1])";
}
}
+ default:
+ return CastOperand(value, type);
+ }
+ }
+
+ std::string CastOperand(const std::string& value, Type type) const {
+ switch (type) {
+ case Type::Bool:
+ case Type::Bool2:
+ case Type::Float:
+ return value;
+ case Type::Int:
+ return "ftoi(" + value + ')';
+ case Type::Uint:
+ return "ftou(" + value + ')';
+ case Type::HalfFloat:
+ // Can't be handled as a stand-alone value
+ UNREACHABLE();
+ return value;
+ }
UNREACHABLE();
return value;
}
@@ -650,6 +661,7 @@ private:
std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
switch (type) {
case Type::Bool:
+ case Type::Bool2:
case Type::Float:
if (needs_parenthesis) {
return '(' + value + ')';
@@ -719,45 +731,51 @@ private:
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- const auto count = static_cast<u32>(operation.GetOperandsCount());
ASSERT(meta);
+ const std::size_t count = operation.GetOperandsCount();
+ const bool has_array = meta->sampler.IsArray();
+ const bool has_shadow = meta->sampler.IsShadow();
+
std::string expr = func;
expr += '(';
expr += GetSampler(meta->sampler);
expr += ", ";
- expr += coord_constructors[meta->coords_count - 1];
+ expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
- for (u32 i = 0; i < count; ++i) {
- const bool is_extra = i >= meta->coords_count;
- const bool is_array = i == meta->array_index;
-
- std::string operand = [&]() {
- if (is_extra && is_extra_int) {
- if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) {
- return std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- return "ftoi(" + Visit(operation[i]) + ')';
- }
- } else {
- return Visit(operation[i]);
- }
- }();
- if (is_array) {
- ASSERT(!is_extra);
- operand = "float(ftoi(" + operand + "))";
- }
+ for (std::size_t i = 0; i < count; ++i) {
+ expr += Visit(operation[i]);
- expr += operand;
-
- if (i + 1 == meta->coords_count) {
- expr += ')';
- }
- if (i + 1 < count) {
+ const std::size_t next = i + 1;
+ if (next < count || has_array || has_shadow)
+ expr += ", ";
+ }
+ if (has_array) {
+ expr += "float(ftoi(" + Visit(meta->array) + "))";
+ }
+ if (has_shadow) {
+ if (has_array)
expr += ", ";
+ expr += Visit(meta->depth_compare);
+ }
+ expr += ')';
+
+ for (const Node extra : meta->extras) {
+ expr += ", ";
+ if (is_extra_int) {
+ if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
+ // Inline the string as an immediate integer in GLSL (some extra arguments are
+ // required to be constant)
+ expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else {
+ expr += "ftoi(" + Visit(extra) + ')';
+ }
+ } else {
+ expr += Visit(extra);
}
}
+
expr += ')';
return expr;
}
@@ -1134,7 +1152,7 @@ private:
Type::HalfFloat);
}
- std::string F4Texture(Operation operation) {
+ std::string Texture(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1145,7 +1163,7 @@ private:
return expr + GetSwizzle(meta->element);
}
- std::string F4TextureLod(Operation operation) {
+ std::string TextureLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1156,7 +1174,7 @@ private:
return expr + GetSwizzle(meta->element);
}
- std::string F4TextureGather(Operation operation) {
+ std::string TextureGather(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1164,7 +1182,7 @@ private:
GetSwizzle(meta->element);
}
- std::string F4TextureQueryDimensions(Operation operation) {
+ std::string TextureQueryDimensions(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1184,7 +1202,7 @@ private:
return "0";
}
- std::string F4TextureQueryLod(Operation operation) {
+ std::string TextureQueryLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1195,29 +1213,33 @@ private:
return "0";
}
- std::string F4TexelFetch(Operation operation) {
+ std::string TexelFetch(Operation operation) {
constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- const auto count = static_cast<u32>(operation.GetOperandsCount());
ASSERT(meta);
+ UNIMPLEMENTED_IF(meta->sampler.IsArray());
+ const std::size_t count = operation.GetOperandsCount();
std::string expr = "texelFetch(";
expr += GetSampler(meta->sampler);
expr += ", ";
- expr += constructors[meta->coords_count - 1];
+ expr += constructors.at(operation.GetOperandsCount() - 1);
expr += '(';
- for (u32 i = 0; i < count; ++i) {
+ for (std::size_t i = 0; i < count; ++i) {
expr += VisitOperand(operation, i, Type::Int);
-
- if (i + 1 == meta->coords_count) {
+ const std::size_t next = i + 1;
+ if (next == count)
expr += ')';
- }
- if (i + 1 < count) {
+ else if (next < count)
expr += ", ";
- }
+ }
+ for (std::size_t i = 0; i < meta->extras.size(); ++i) {
+ expr += ", ";
+ expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
}
expr += ')';
+
return expr + GetSwizzle(meta->element);
}
@@ -1454,12 +1476,12 @@ private:
&GLSLDecompiler::Logical2HNotEqual,
&GLSLDecompiler::Logical2HGreaterEqual,
- &GLSLDecompiler::F4Texture,
- &GLSLDecompiler::F4TextureLod,
- &GLSLDecompiler::F4TextureGather,
- &GLSLDecompiler::F4TextureQueryDimensions,
- &GLSLDecompiler::F4TextureQueryLod,
- &GLSLDecompiler::F4TexelFetch,
+ &GLSLDecompiler::Texture,
+ &GLSLDecompiler::TextureLod,
+ &GLSLDecompiler::TextureGather,
+ &GLSLDecompiler::TextureQueryDimensions,
+ &GLSLDecompiler::TextureQueryLod,
+ &GLSLDecompiler::TexelFetch,
&GLSLDecompiler::Branch,
&GLSLDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6476a9e1a..cca2ed708 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default;
void RendererOpenGL::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- Core::System::GetInstance().GetPerfStats().EndSystemFrame();
+ system.GetPerfStats().EndSystemFrame();
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
@@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers(
render_window.PollEvents();
- Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
- Core::System::GetInstance().GetPerfStats().BeginSystemFrame();
+ system.FrameLimiter().DoFrameLimiting(Core::Timing::GetGlobalTimeUs());
+ system.GetPerfStats().BeginSystemFrame();
// Restore the rasterizer state
prev_state.Apply();
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
new file mode 100644
index 000000000..ba25b5bc7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vulkan/vulkan.hpp>
+
+namespace Vulkan {
+
+// vulkan.hpp unique handlers use DispatchLoaderStatic
+template <typename T>
+using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
+
+using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
+using UniqueBuffer = UniqueHandle<vk::Buffer>;
+using UniqueBufferView = UniqueHandle<vk::BufferView>;
+using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
+using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
+using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
+using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
+using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
+using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
+using UniqueDevice = UniqueHandle<vk::Device>;
+using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
+using UniqueEvent = UniqueHandle<vk::Event>;
+using UniqueFence = UniqueHandle<vk::Fence>;
+using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
+using UniqueImage = UniqueHandle<vk::Image>;
+using UniqueImageView = UniqueHandle<vk::ImageView>;
+using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
+using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
+using UniquePipeline = UniqueHandle<vk::Pipeline>;
+using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
+using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
+using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
+using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
+using UniqueSampler = UniqueHandle<vk::Sampler>;
+using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
+using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
+using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
+using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
+using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
new file mode 100644
index 000000000..78a4e5f0e
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,231 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <optional>
+#include <set>
+#include <vector>
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+
+namespace Vulkan {
+
+namespace Alternatives {
+
+constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
+ vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
+constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
+ vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
+
+} // namespace Alternatives
+
+constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
+ switch (format) {
+ case vk::Format::eD24UnormS8Uint:
+ return Alternatives::Depth24UnormS8Uint.data();
+ case vk::Format::eD16UnormS8Uint:
+ return Alternatives::Depth16UnormS8Uint.data();
+ default:
+ return nullptr;
+ }
+}
+
+constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
+ FormatType format_type) {
+ switch (format_type) {
+ case FormatType::Linear:
+ return properties.linearTilingFeatures;
+ case FormatType::Optimal:
+ return properties.optimalTilingFeatures;
+ case FormatType::Buffer:
+ return properties.bufferFeatures;
+ default:
+ return {};
+ }
+}
+
+VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+ vk::SurfaceKHR surface)
+ : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
+ SetupFamilies(dldi, surface);
+ SetupProperties(dldi);
+}
+
+VKDevice::~VKDevice() = default;
+
+bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
+ const auto queue_cis = GetDeviceQueueCreateInfos();
+ vk::PhysicalDeviceFeatures device_features{};
+
+ const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+ const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
+ 0, nullptr, static_cast<u32>(extensions.size()),
+ extensions.data(), &device_features);
+ vk::Device dummy_logical;
+ if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
+ LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
+ return false;
+ }
+
+ dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
+ logical = UniqueDevice(
+ dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+
+ graphics_queue = logical->getQueue(graphics_family, 0, dld);
+ present_queue = logical->getQueue(present_family, 0, dld);
+ return true;
+}
+
+vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
+ vk::FormatFeatureFlags wanted_usage,
+ FormatType format_type) const {
+ if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
+ return wanted_format;
+ }
+ // The wanted format is not supported by hardware, search for alternatives
+ const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
+ if (alternatives == nullptr) {
+ LOG_CRITICAL(Render_Vulkan,
+ "Format={} with usage={} and type={} has no defined alternatives and host "
+ "hardware does not support it",
+ static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+ static_cast<u32>(format_type));
+ UNREACHABLE();
+ return wanted_format;
+ }
+
+ std::size_t i = 0;
+ for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
+ alternative = alternatives[++i]) {
+ if (!IsFormatSupported(alternative, wanted_usage, format_type))
+ continue;
+ LOG_WARNING(Render_Vulkan,
+ "Emulating format={} with alternative format={} with usage={} and type={}",
+ static_cast<u32>(wanted_format), static_cast<u32>(alternative),
+ static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
+ return alternative;
+ }
+
+ // No alternatives found, panic
+ LOG_CRITICAL(Render_Vulkan,
+ "Format={} with usage={} and type={} is not supported by the host hardware and "
+ "doesn't support any of the alternatives",
+ static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+ static_cast<u32>(format_type));
+ UNREACHABLE();
+ return wanted_format;
+}
+
+bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+ FormatType format_type) const {
+ const auto it = format_properties.find(wanted_format);
+ if (it == format_properties.end()) {
+ LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
+ static_cast<u32>(wanted_format));
+ UNREACHABLE();
+ return true;
+ }
+ const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+ return (supported_usage & wanted_usage) == wanted_usage;
+}
+
+bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+ vk::SurfaceKHR surface) {
+ const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
+
+ bool has_swapchain{};
+ for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+ has_swapchain |= prop.extensionName == swapchain_extension;
+ }
+ if (!has_swapchain) {
+ // The device doesn't support creating swapchains.
+ return false;
+ }
+
+ bool has_graphics{}, has_present{};
+ const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+ for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+ const auto& family = queue_family_properties[i];
+ if (family.queueCount == 0)
+ continue;
+
+ has_graphics |=
+ (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
+ has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
+ }
+ if (!has_graphics || !has_present) {
+ // The device doesn't have a graphics and present queue.
+ return false;
+ }
+
+ // TODO(Rodrigo): Check if the device matches all requeriments.
+ const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+ if (props.limits.maxUniformBufferRange < 65536) {
+ return false;
+ }
+
+ // Device is suitable.
+ return true;
+}
+
+void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
+ std::optional<u32> graphics_family_, present_family_;
+
+ const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+ for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+ if (graphics_family_ && present_family_)
+ break;
+
+ const auto& queue_family = queue_family_properties[i];
+ if (queue_family.queueCount == 0)
+ continue;
+
+ if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
+ graphics_family_ = i;
+ if (physical.getSurfaceSupportKHR(i, surface, dldi))
+ present_family_ = i;
+ }
+ ASSERT(graphics_family_ && present_family_);
+
+ graphics_family = *graphics_family_;
+ present_family = *present_family_;
+}
+
+void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
+ const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+ device_type = props.deviceType;
+ uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+}
+
+std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
+ static const float QUEUE_PRIORITY = 1.f;
+
+ std::set<u32> unique_queue_families = {graphics_family, present_family};
+ std::vector<vk::DeviceQueueCreateInfo> queue_cis;
+
+ for (u32 queue_family : unique_queue_families)
+ queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
+
+ return queue_cis;
+}
+
+std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+ const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+ std::map<vk::Format, vk::FormatProperties> format_properties;
+
+ const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
+ format_properties.emplace(format, physical.getFormatProperties(format, dldi));
+ };
+ AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
+ AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
+ AddFormatQuery(vk::Format::eD32Sfloat);
+ AddFormatQuery(vk::Format::eD16UnormS8Uint);
+ AddFormatQuery(vk::Format::eD24UnormS8Uint);
+ AddFormatQuery(vk::Format::eD32SfloatS8Uint);
+
+ return format_properties;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
new file mode 100644
index 000000000..e87c7a508
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+/// Format usage descriptor
+enum class FormatType { Linear, Optimal, Buffer };
+
+/// Handles data specific to a physical device.
+class VKDevice final {
+public:
+ explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+ vk::SurfaceKHR surface);
+ ~VKDevice();
+
+ /// Initializes the device. Returns true on success.
+ bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
+
+ /**
+ * Returns a format supported by the device for the passed requeriments.
+ * @param wanted_format The ideal format to be returned. It may not be the returned format.
+ * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
+ * @param format_type Format type usage.
+ * @returns A format supported by the device.
+ */
+ vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+ FormatType format_type) const;
+
+ /// Returns the dispatch loader with direct function pointers of the device
+ const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
+ return dld;
+ }
+
+ /// Returns the logical device
+ vk::Device GetLogical() const {
+ return logical.get();
+ }
+
+ /// Returns the physical device.
+ vk::PhysicalDevice GetPhysical() const {
+ return physical;
+ }
+
+ /// Returns the main graphics queue.
+ vk::Queue GetGraphicsQueue() const {
+ return graphics_queue;
+ }
+
+ /// Returns the main present queue.
+ vk::Queue GetPresentQueue() const {
+ return present_queue;
+ }
+
+ /// Returns main graphics queue family index.
+ u32 GetGraphicsFamily() const {
+ return graphics_family;
+ }
+
+ /// Returns main present queue family index.
+ u32 GetPresentFamily() const {
+ return present_family;
+ }
+
+ /// Returns if the device is integrated with the host CPU
+ bool IsIntegrated() const {
+ return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
+ }
+
+ /// Returns uniform buffer alignment requeriment
+ u64 GetUniformBufferAlignment() const {
+ return uniform_buffer_alignment;
+ }
+
+ /// Checks if the physical device is suitable.
+ static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+ vk::SurfaceKHR surface);
+
+private:
+ /// Sets up queue families.
+ void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
+
+ /// Sets up device properties.
+ void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
+
+ /// Returns a list of queue initialization descriptors.
+ std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
+
+ /// Returns true if a format is supported.
+ bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+ FormatType format_type) const;
+
+ /// Returns the device properties for Vulkan formats.
+ static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+ const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
+
+ const vk::PhysicalDevice physical; ///< Physical device
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers
+ UniqueDevice logical; ///< Logical device
+ vk::Queue graphics_queue; ///< Main graphics queue
+ vk::Queue present_queue; ///< Main present queue
+ u32 graphics_family{}; ///< Main graphics queue family index
+ u32 present_family{}; ///< Main present queue family index
+ vk::PhysicalDeviceType device_type; ///< Physical device type
+ u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment
+ std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 38bb692d6..9fd4b273e 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
- SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
+ SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
@@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod
SetRegister(bb, dest, value);
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index a992f73f8..55a6fbbf2 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
value = [&]() {
switch (instr.conversion.f2i.rounding) {
- case Tegra::Shader::F2iRoundingOp::None:
- return value;
+ case Tegra::Shader::F2iRoundingOp::RoundEven:
+ return Operation(OperationCode::FRoundEven, PRECISE, value);
case Tegra::Shader::F2iRoundingOp::Floor:
return Operation(OperationCode::FFloor, PRECISE, value);
case Tegra::Shader::F2iRoundingOp::Ceil:
@@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index e006f8138..55ec601ff 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -306,7 +306,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
-
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
}
@@ -315,9 +314,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = GetRegister(instr.gpr20);
- std::vector<Node> coords;
-
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+ std::vector<Node> coords;
if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
@@ -328,18 +326,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
coords.push_back(op_a);
coords.push_back(op_b);
}
- const auto num_coords = static_cast<u32>(coords.size());
- coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
+ std::vector<Node> extras;
+ extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
const auto& sampler =
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
- auto params = coords;
- MetaTexture meta{sampler, element, num_coords};
- values[element] =
- Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+ auto coords_copy = coords;
+ MetaTexture meta{sampler, {}, {}, extras, element};
+ values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
WriteTexsInstructionFloat(bb, instr, values);
@@ -360,12 +357,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
for (u32 element = 0; element < 4; ++element) {
- if (instr.txq.IsComponentEnabled(element)) {
- MetaTexture meta{sampler, element};
- const Node value = Operation(OperationCode::F4TextureQueryDimensions,
- std::move(meta), GetRegister(instr.gpr8));
- SetTemporal(bb, indexer++, value);
+ if (!instr.txq.IsComponentEnabled(element)) {
+ continue;
}
+ MetaTexture meta{sampler, {}, {}, {}, element};
+ const Node value =
+ Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+ SetTemporal(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
@@ -412,9 +410,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
- MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
- const Node value =
- Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
+ MetaTexture meta{sampler, {}, {}, {}, element};
+ const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporal(bb, element, value);
}
for (u32 element = 0; element < 2; ++element) {
@@ -432,7 +429,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+ LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
}
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
@@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
}
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, bool depth_compare, bool is_array,
- std::size_t array_offset, std::size_t bias_offset,
- std::vector<Node>&& coords) {
- UNIMPLEMENTED_IF_MSG(
- (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
- (texture_type == TextureType::TextureCube && is_array && depth_compare),
- "This method is not supported.");
+ TextureProcessMode process_mode, std::vector<Node> coords,
+ Node array, Node depth_compare, u32 bias_offset) {
+ const bool is_array = array;
+ const bool is_shadow = depth_compare;
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+ UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
+ (texture_type == TextureType::TextureCube && is_array && is_shadow),
+ "This method is not supported.");
+
+ const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
@@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
// LOD selection (either via bias or explicit textureLod) not supported in GL for
// sampler2DArrayShadow and samplerCubeArrayShadow.
const bool gl_lod_supported =
- !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
- (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
+ !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
+ (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
const OperationCode read_method =
- lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
+ lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
- std::optional<u32> array_offset_value;
- if (is_array)
- array_offset_value = static_cast<u32>(array_offset);
-
- const auto coords_count = static_cast<u32>(coords.size());
-
+ std::vector<Node> extras;
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
if (process_mode == TextureProcessMode::LZ) {
- coords.push_back(Immediate(0.0f));
+ extras.push_back(Immediate(0.0f));
} else {
// If present, lod or bias are always stored in the register indexed by the gpr20
// field with an offset depending on the usage of the other registers
- coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
+ extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
}
}
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
- auto params = coords;
- MetaTexture meta{sampler, element, coords_count, array_offset_value};
- values[element] = Operation(read_method, std::move(meta), std::move(params));
+ auto copy_coords = coords;
+ MetaTexture meta{sampler, array, depth_compare, extras, element};
+ values[element] = Operation(read_method, meta, std::move(copy_coords));
}
return values;
@@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
- // 1D.DC in opengl the 2nd component is ignored.
+ // 1D.DC in OpenGL the 2nd component is ignored.
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
coords.push_back(Immediate(0.0f));
}
- std::size_t array_offset{};
- if (is_array) {
- array_offset = coords.size();
- coords.push_back(GetRegister(array_register));
- }
+
+ const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+ Node dc{};
if (depth_compare) {
- // Depth is always stored in the register signaled by gpr20
- // or in the next register if lod or bias are used
+ // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+ // or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
- coords.push_back(GetRegister(depth_register));
- }
- // Fill ignored coordinates
- while (coords.size() < total_coord_count) {
- coords.push_back(Immediate(0));
+ dc = GetRegister(depth_register);
}
- return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
- 0, std::move(coords));
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
+ const u32 bias_offset = coord_count > 2 ? 1 : 0;
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
@@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
- std::size_t array_offset{};
- if (is_array) {
- array_offset = coords.size();
- coords.push_back(GetRegister(array_register));
- }
+ const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+ Node dc{};
if (depth_compare) {
- // Depth is always stored in the register signaled by gpr20
- // or in the next register if lod or bias are used
+ // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+ // or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
- coords.push_back(GetRegister(depth_register));
- }
- // Fill ignored coordinates
- while (coords.size() < total_coord_count) {
- coords.push_back(Immediate(0));
+ dc = GetRegister(depth_register);
}
- return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
- (coord_count > 2 ? 1 : 0), std::move(coords));
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
@@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
-
- for (size_t i = 0; i < coord_count; ++i) {
+ for (size_t i = 0; i < coord_count; ++i)
coords.push_back(GetRegister(coord_register + i));
- }
- std::optional<u32> array_offset;
- if (is_array) {
- array_offset = static_cast<u32>(coords.size());
- coords.push_back(GetRegister(array_register));
- }
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
- auto params = coords;
- MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
- values[element] =
- Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+ auto coords_copy = coords;
+ MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
+ values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
return values;
@@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
- const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
// If enabled arrays index is always stored in the gpr8 field
@@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
: coord_register + 1;
std::vector<Node> coords;
-
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
- std::optional<u32> array_offset;
- if (is_array) {
- array_offset = static_cast<u32>(coords.size());
- coords.push_back(GetRegister(array_register));
- }
- const auto coords_count = static_cast<u32>(coords.size());
- if (lod_enabled) {
- // When lod is used always is in grp20
- coords.push_back(GetRegister(instr.gpr20));
- } else {
- coords.push_back(Immediate(0));
- }
+ const Node array = is_array ? GetRegister(array_register) : nullptr;
+ // When lod is used always is in gpr20
+ const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
- auto params = coords;
- MetaTexture meta{sampler, element, coords_count, array_offset};
- values[element] =
- Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
+ auto coords_copy = coords;
+ MetaTexture meta{sampler, array, {}, {lod}, element};
+ values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 1d4fbef53..52c7f2c4e 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -156,12 +156,12 @@ enum class OperationCode {
Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4
- F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4
- F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4
- F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4
- F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
- F4TexelFetch, /// (MetaTexture, int[N], int) -> float4
+ Texture, /// (MetaTexture, float[N] coords) -> float4
+ TextureLod, /// (MetaTexture, float[N] coords) -> float4
+ TextureGather, /// (MetaTexture, float[N] coords) -> float4
+ TextureQueryDimensions, /// (MetaTexture, float a) -> float4
+ TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
+ TexelFetch, /// (MetaTexture, int[N], int) -> float4
Branch, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
@@ -288,9 +288,10 @@ struct MetaHalfArithmetic {
struct MetaTexture {
const Sampler& sampler;
+ Node array{};
+ Node depth_compare{};
+ std::vector<Node> extras;
u32 element{};
- u32 coords_count{};
- std::optional<u32> array_index;
};
constexpr MetaArithmetic PRECISE = {true};
@@ -754,9 +755,8 @@ private:
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array, std::size_t array_offset, std::size_t bias_offset,
- std::vector<Node>&& coords);
+ Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
+ Node array, Node depth_compare, u32 bias_offset);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 2f6612a35..044ba116a 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -426,6 +426,8 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::ABGR8U;
+ case Tegra::FramebufferConfig::PixelFormat::BGRA8:
+ return PixelFormat::BGRA8;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();