diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 14 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 34 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.h (renamed from src/video_core/engines/maxwell_compute.h) | 31 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_compute.cpp | 28 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 11 | ||||
-rw-r--r-- | src/video_core/gpu.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 136 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/declarations.h | 45 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_device.cpp | 231 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_device.h | 116 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 159 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 22 | ||||
-rw-r--r-- | src/video_core/surface.cpp | 2 |
18 files changed, 626 insertions, 230 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 33e507e69..d35a738d5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -5,12 +5,12 @@ add_library(video_core STATIC debug_utils/debug_utils.h engines/fermi_2d.cpp engines/fermi_2d.h + engines/kepler_compute.cpp + engines/kepler_compute.h engines/kepler_memory.cpp engines/kepler_memory.h engines/maxwell_3d.cpp engines/maxwell_3d.h - engines/maxwell_compute.cpp - engines/maxwell_compute.h engines/maxwell_dma.cpp engines/maxwell_dma.h engines/shader_bytecode.h @@ -101,6 +101,16 @@ add_library(video_core STATIC video_core.h ) +if (ENABLE_VULKAN) + target_sources(video_core PRIVATE + renderer_vulkan/declarations.h + renderer_vulkan/vk_device.cpp + renderer_vulkan/vk_device.h) + + target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) + target_compile_definitions(video_core PRIVATE HAS_VULKAN) +endif() + create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp new file mode 100644 index 000000000..4ca856b6b --- /dev/null +++ b/src/video_core/engines/kepler_compute.cpp @@ -0,0 +1,34 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/memory_manager.h" + +namespace Tegra::Engines { + +KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} + +KeplerCompute::~KeplerCompute() = default; + +void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { + ASSERT_MSG(method_call.method < Regs::NUM_REGS, + "Invalid KeplerCompute register, increase the size of the Regs structure"); + + regs.reg_array[method_call.method] = method_call.argument; + + switch (method_call.method) { + case KEPLER_COMPUTE_REG_INDEX(launch): + // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA + // kernels) + UNREACHABLE_MSG("Compute shaders are not implemented"); + break; + default: + break; + } +} + +} // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h index 1d71f11bd..df0a32e0f 100644 --- a/src/video_core/engines/maxwell_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,47 +10,48 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" +#include "video_core/memory_manager.h" namespace Tegra::Engines { -#define MAXWELL_COMPUTE_REG_INDEX(field_name) \ - (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32)) +#define KEPLER_COMPUTE_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class MaxwellCompute final { +class KeplerCompute final { public: - MaxwellCompute() = default; - ~MaxwellCompute() = default; + explicit KeplerCompute(MemoryManager& memory_manager); + ~KeplerCompute(); + + static constexpr std::size_t NumConstBuffers = 8; struct Regs { static constexpr std::size_t NUM_REGS = 0xCF8; union { struct { - INSERT_PADDING_WORDS(0x281); + INSERT_PADDING_WORDS(0xAF); - union { - u32 compute_end; - BitField<0, 1, u32> unknown; - } compute; + u32 launch; - INSERT_PADDING_WORDS(0xA76); + INSERT_PADDING_WORDS(0xC48); }; std::array<u32, NUM_REGS> reg_array; }; } regs{}; - static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), - "MaxwellCompute Regs has wrong size"); + "KeplerCompute Regs has wrong size"); + + MemoryManager& memory_manager; /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \ + static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(compute, 0x281); +ASSERT_REG_POSITION(launch, 0xAF); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 10eae6a65..19b6b14b2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() { LongQueryResult query_result{}; query_result.value = result; // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = CoreTiming::GetTicks(); + query_result.timestamp = Core::Timing::GetTicks(); Memory::WriteBlock(*address, &query_result, sizeof(query_result)); } dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp deleted file mode 100644 index 656db6a61..000000000 --- a/src/video_core/engines/maxwell_compute.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "core/core.h" -#include "video_core/engines/maxwell_compute.h" - -namespace Tegra::Engines { - -void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) { - ASSERT_MSG(method_call.method < Regs::NUM_REGS, - "Invalid MaxwellCompute register, increase the size of the Regs structure"); - - regs.reg_array[method_call.method] = method_call.argument; - - switch (method_call.method) { - case MAXWELL_COMPUTE_REG_INDEX(compute): { - LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented"); - UNREACHABLE(); - break; - } - default: - break; - } -} - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 269df9437..1f425f90b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -186,7 +186,7 @@ enum class SubOp : u64 { }; enum class F2iRoundingOp : u64 { - None = 0, + RoundEven = 0, Floor = 1, Ceil = 2, Trunc = 3, diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d3d32a359..3d00c308b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,9 +6,9 @@ #include "core/core_timing.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/maxwell_compute.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" @@ -18,6 +18,7 @@ namespace Tegra { u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { switch (format) { case PixelFormat::ABGR8: + case PixelFormat::BGRA8: return 4; default: return 4; @@ -31,7 +32,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); - maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); + kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); } @@ -245,8 +246,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) { case EngineID::MAXWELL_B: maxwell_3d->CallMethod(method_call); break; - case EngineID::MAXWELL_COMPUTE_B: - maxwell_compute->CallMethod(method_call); + case EngineID::KEPLER_COMPUTE_B: + kepler_compute->CallMethod(method_call); break; case EngineID::MAXWELL_DMA_COPY_A: maxwell_dma->CallMethod(method_call); @@ -282,7 +283,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = CoreTiming::GetTicks(); + block.timestamp = Core::Timing::GetTicks(); Memory::WriteBlock(*address, &block, sizeof(block)); } else { const auto address = diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fb8975811..a482196ea 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -80,6 +80,7 @@ class DebugContext; struct FramebufferConfig { enum class PixelFormat : u32 { ABGR8 = 1, + BGRA8 = 5, }; /** @@ -102,15 +103,15 @@ struct FramebufferConfig { namespace Engines { class Fermi2D; class Maxwell3D; -class MaxwellCompute; class MaxwellDMA; +class KeplerCompute; class KeplerMemory; } // namespace Engines enum class EngineID { FERMI_TWOD_A = 0x902D, // 2D Engine MAXWELL_B = 0xB197, // 3D Engine - MAXWELL_COMPUTE_B = 0xB1C0, + KEPLER_COMPUTE_B = 0xB1C0, KEPLER_INLINE_TO_MEMORY_B = 0xA140, MAXWELL_DMA_COPY_A = 0xB0B5, }; @@ -208,7 +209,7 @@ private: /// 2D engine std::unique_ptr<Engines::Fermi2D> fermi_2d; /// Compute engine - std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; + std::unique_ptr<Engines::KeplerCompute> kepler_compute; /// DMA engine std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 70e124dc4..db18f4dbe 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -616,17 +616,8 @@ private: std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { std::string value = VisitOperand(operation, operand_index); - switch (type) { - case Type::Bool: - case Type::Bool2: - case Type::Float: - return value; - case Type::Int: - return "ftoi(" + value + ')'; - case Type::Uint: - return "ftou(" + value + ')'; - case Type::HalfFloat: + case Type::HalfFloat: { const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); if (!half_meta) { value = "toHalf2(" + value + ')'; @@ -643,6 +634,26 @@ private: return "vec2(toHalf2(" + value + ")[1])"; } } + default: + return CastOperand(value, type); + } + } + + std::string CastOperand(const std::string& value, Type type) const { + switch (type) { + case Type::Bool: + case Type::Bool2: + case Type::Float: + return value; + case Type::Int: + return "ftoi(" + value + ')'; + case Type::Uint: + return "ftou(" + value + ')'; + case Type::HalfFloat: + // Can't be handled as a stand-alone value + UNREACHABLE(); + return value; + } UNREACHABLE(); return value; } @@ -650,6 +661,7 @@ private: std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { switch (type) { case Type::Bool: + case Type::Bool2: case Type::Float: if (needs_parenthesis) { return '(' + value + ')'; @@ -719,45 +731,51 @@ private: constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - const auto count = static_cast<u32>(operation.GetOperandsCount()); ASSERT(meta); + const std::size_t count = operation.GetOperandsCount(); + const bool has_array = meta->sampler.IsArray(); + const bool has_shadow = meta->sampler.IsShadow(); + std::string expr = func; expr += '('; expr += GetSampler(meta->sampler); expr += ", "; - expr += coord_constructors[meta->coords_count - 1]; + expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); expr += '('; - for (u32 i = 0; i < count; ++i) { - const bool is_extra = i >= meta->coords_count; - const bool is_array = i == meta->array_index; - - std::string operand = [&]() { - if (is_extra && is_extra_int) { - if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) { - return std::to_string(static_cast<s32>(immediate->GetValue())); - } else { - return "ftoi(" + Visit(operation[i]) + ')'; - } - } else { - return Visit(operation[i]); - } - }(); - if (is_array) { - ASSERT(!is_extra); - operand = "float(ftoi(" + operand + "))"; - } + for (std::size_t i = 0; i < count; ++i) { + expr += Visit(operation[i]); - expr += operand; - - if (i + 1 == meta->coords_count) { - expr += ')'; - } - if (i + 1 < count) { + const std::size_t next = i + 1; + if (next < count || has_array || has_shadow) + expr += ", "; + } + if (has_array) { + expr += "float(ftoi(" + Visit(meta->array) + "))"; + } + if (has_shadow) { + if (has_array) expr += ", "; + expr += Visit(meta->depth_compare); + } + expr += ')'; + + for (const Node extra : meta->extras) { + expr += ", "; + if (is_extra_int) { + if (const auto immediate = std::get_if<ImmediateNode>(extra)) { + // Inline the string as an immediate integer in GLSL (some extra arguments are + // required to be constant) + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(extra) + ')'; + } + } else { + expr += Visit(extra); } } + expr += ')'; return expr; } @@ -1134,7 +1152,7 @@ private: Type::HalfFloat); } - std::string F4Texture(Operation operation) { + std::string Texture(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1145,7 +1163,7 @@ private: return expr + GetSwizzle(meta->element); } - std::string F4TextureLod(Operation operation) { + std::string TextureLod(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1156,7 +1174,7 @@ private: return expr + GetSwizzle(meta->element); } - std::string F4TextureGather(Operation operation) { + std::string TextureGather(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1164,7 +1182,7 @@ private: GetSwizzle(meta->element); } - std::string F4TextureQueryDimensions(Operation operation) { + std::string TextureQueryDimensions(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1184,7 +1202,7 @@ private: return "0"; } - std::string F4TextureQueryLod(Operation operation) { + std::string TextureQueryLod(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1195,29 +1213,33 @@ private: return "0"; } - std::string F4TexelFetch(Operation operation) { + std::string TexelFetch(Operation operation) { constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - const auto count = static_cast<u32>(operation.GetOperandsCount()); ASSERT(meta); + UNIMPLEMENTED_IF(meta->sampler.IsArray()); + const std::size_t count = operation.GetOperandsCount(); std::string expr = "texelFetch("; expr += GetSampler(meta->sampler); expr += ", "; - expr += constructors[meta->coords_count - 1]; + expr += constructors.at(operation.GetOperandsCount() - 1); expr += '('; - for (u32 i = 0; i < count; ++i) { + for (std::size_t i = 0; i < count; ++i) { expr += VisitOperand(operation, i, Type::Int); - - if (i + 1 == meta->coords_count) { + const std::size_t next = i + 1; + if (next == count) expr += ')'; - } - if (i + 1 < count) { + else if (next < count) expr += ", "; - } + } + for (std::size_t i = 0; i < meta->extras.size(); ++i) { + expr += ", "; + expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); } expr += ')'; + return expr + GetSwizzle(meta->element); } @@ -1454,12 +1476,12 @@ private: &GLSLDecompiler::Logical2HNotEqual, &GLSLDecompiler::Logical2HGreaterEqual, - &GLSLDecompiler::F4Texture, - &GLSLDecompiler::F4TextureLod, - &GLSLDecompiler::F4TextureGather, - &GLSLDecompiler::F4TextureQueryDimensions, - &GLSLDecompiler::F4TextureQueryLod, - &GLSLDecompiler::F4TexelFetch, + &GLSLDecompiler::Texture, + &GLSLDecompiler::TextureLod, + &GLSLDecompiler::TextureGather, + &GLSLDecompiler::TextureQueryDimensions, + &GLSLDecompiler::TextureQueryLod, + &GLSLDecompiler::TexelFetch, &GLSLDecompiler::Branch, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6476a9e1a..cca2ed708 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { - Core::System::GetInstance().GetPerfStats().EndSystemFrame(); + system.GetPerfStats().EndSystemFrame(); // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); @@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers( render_window.PollEvents(); - Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); - Core::System::GetInstance().GetPerfStats().BeginSystemFrame(); + system.FrameLimiter().DoFrameLimiting(Core::Timing::GetGlobalTimeUs()); + system.GetPerfStats().BeginSystemFrame(); // Restore the rasterizer state prev_state.Apply(); diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h new file mode 100644 index 000000000..ba25b5bc7 --- /dev/null +++ b/src/video_core/renderer_vulkan/declarations.h @@ -0,0 +1,45 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vulkan/vulkan.hpp> + +namespace Vulkan { + +// vulkan.hpp unique handlers use DispatchLoaderStatic +template <typename T> +using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>; + +using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>; +using UniqueBuffer = UniqueHandle<vk::Buffer>; +using UniqueBufferView = UniqueHandle<vk::BufferView>; +using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>; +using UniqueCommandPool = UniqueHandle<vk::CommandPool>; +using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>; +using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>; +using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>; +using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>; +using UniqueDevice = UniqueHandle<vk::Device>; +using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>; +using UniqueEvent = UniqueHandle<vk::Event>; +using UniqueFence = UniqueHandle<vk::Fence>; +using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; +using UniqueImage = UniqueHandle<vk::Image>; +using UniqueImageView = UniqueHandle<vk::ImageView>; +using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; +using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; +using UniquePipeline = UniqueHandle<vk::Pipeline>; +using UniquePipelineCache = UniqueHandle<vk::PipelineCache>; +using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>; +using UniqueQueryPool = UniqueHandle<vk::QueryPool>; +using UniqueRenderPass = UniqueHandle<vk::RenderPass>; +using UniqueSampler = UniqueHandle<vk::Sampler>; +using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; +using UniqueSemaphore = UniqueHandle<vk::Semaphore>; +using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; +using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; +using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp new file mode 100644 index 000000000..78a4e5f0e --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -0,0 +1,231 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <map> +#include <optional> +#include <set> +#include <vector> +#include "common/assert.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" + +namespace Vulkan { + +namespace Alternatives { + +constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { + vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; +constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { + vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; + +} // namespace Alternatives + +constexpr const vk::Format* GetFormatAlternatives(vk::Format format) { + switch (format) { + case vk::Format::eD24UnormS8Uint: + return Alternatives::Depth24UnormS8Uint.data(); + case vk::Format::eD16UnormS8Uint: + return Alternatives::Depth16UnormS8Uint.data(); + default: + return nullptr; + } +} + +constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, + FormatType format_type) { + switch (format_type) { + case FormatType::Linear: + return properties.linearTilingFeatures; + case FormatType::Optimal: + return properties.optimalTilingFeatures; + case FormatType::Buffer: + return properties.bufferFeatures; + default: + return {}; + } +} + +VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface) + : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { + SetupFamilies(dldi, surface); + SetupProperties(dldi); +} + +VKDevice::~VKDevice() = default; + +bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { + const auto queue_cis = GetDeviceQueueCreateInfos(); + vk::PhysicalDeviceFeatures device_features{}; + + const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), + 0, nullptr, static_cast<u32>(extensions.size()), + extensions.data(), &device_features); + vk::Device dummy_logical; + if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); + return false; + } + + dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); + logical = UniqueDevice( + dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); + + graphics_queue = logical->getQueue(graphics_family, 0, dld); + present_queue = logical->getQueue(present_family, 0, dld); + return true; +} + +vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, + vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const { + if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { + return wanted_format; + } + // The wanted format is not supported by hardware, search for alternatives + const vk::Format* alternatives = GetFormatAlternatives(wanted_format); + if (alternatives == nullptr) { + LOG_CRITICAL(Render_Vulkan, + "Format={} with usage={} and type={} has no defined alternatives and host " + "hardware does not support it", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); + UNREACHABLE(); + return wanted_format; + } + + std::size_t i = 0; + for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; + alternative = alternatives[++i]) { + if (!IsFormatSupported(alternative, wanted_usage, format_type)) + continue; + LOG_WARNING(Render_Vulkan, + "Emulating format={} with alternative format={} with usage={} and type={}", + static_cast<u32>(wanted_format), static_cast<u32>(alternative), + static_cast<u32>(wanted_usage), static_cast<u32>(format_type)); + return alternative; + } + + // No alternatives found, panic + LOG_CRITICAL(Render_Vulkan, + "Format={} with usage={} and type={} is not supported by the host hardware and " + "doesn't support any of the alternatives", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); + UNREACHABLE(); + return wanted_format; +} + +bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const { + const auto it = format_properties.find(wanted_format); + if (it == format_properties.end()) { + LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", + static_cast<u32>(wanted_format)); + UNREACHABLE(); + return true; + } + const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type); + return (supported_usage & wanted_usage) == wanted_usage; +} + +bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface) { + const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; + + bool has_swapchain{}; + for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + has_swapchain |= prop.extensionName == swapchain_extension; + } + if (!has_swapchain) { + // The device doesn't support creating swapchains. + return false; + } + + bool has_graphics{}, has_present{}; + const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { + const auto& family = queue_family_properties[i]; + if (family.queueCount == 0) + continue; + + has_graphics |= + (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); + has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; + } + if (!has_graphics || !has_present) { + // The device doesn't have a graphics and present queue. + return false; + } + + // TODO(Rodrigo): Check if the device matches all requeriments. + const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + if (props.limits.maxUniformBufferRange < 65536) { + return false; + } + + // Device is suitable. + return true; +} + +void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { + std::optional<u32> graphics_family_, present_family_; + + const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { + if (graphics_family_ && present_family_) + break; + + const auto& queue_family = queue_family_properties[i]; + if (queue_family.queueCount == 0) + continue; + + if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) + graphics_family_ = i; + if (physical.getSurfaceSupportKHR(i, surface, dldi)) + present_family_ = i; + } + ASSERT(graphics_family_ && present_family_); + + graphics_family = *graphics_family_; + present_family = *present_family_; +} + +void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { + const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + device_type = props.deviceType; + uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); +} + +std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { + static const float QUEUE_PRIORITY = 1.f; + + std::set<u32> unique_queue_families = {graphics_family, present_family}; + std::vector<vk::DeviceQueueCreateInfo> queue_cis; + + for (u32 queue_family : unique_queue_families) + queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY}); + + return queue_cis; +} + +std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( + const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + std::map<vk::Format, vk::FormatProperties> format_properties; + + const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { + format_properties.emplace(format, physical.getFormatProperties(format, dldi)); + }; + AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); + AddFormatQuery(vk::Format::eR5G6B5UnormPack16); + AddFormatQuery(vk::Format::eD32Sfloat); + AddFormatQuery(vk::Format::eD16UnormS8Uint); + AddFormatQuery(vk::Format::eD24UnormS8Uint); + AddFormatQuery(vk::Format::eD32SfloatS8Uint); + + return format_properties; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h new file mode 100644 index 000000000..e87c7a508 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -0,0 +1,116 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <vector> +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +/// Format usage descriptor +enum class FormatType { Linear, Optimal, Buffer }; + +/// Handles data specific to a physical device. +class VKDevice final { +public: + explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface); + ~VKDevice(); + + /// Initializes the device. Returns true on success. + bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); + + /** + * Returns a format supported by the device for the passed requeriments. + * @param wanted_format The ideal format to be returned. It may not be the returned format. + * @param wanted_usage The usage that must be fulfilled even if the format is not supported. + * @param format_type Format type usage. + * @returns A format supported by the device. + */ + vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const; + + /// Returns the dispatch loader with direct function pointers of the device + const vk::DispatchLoaderDynamic& GetDispatchLoader() const { + return dld; + } + + /// Returns the logical device + vk::Device GetLogical() const { + return logical.get(); + } + + /// Returns the physical device. + vk::PhysicalDevice GetPhysical() const { + return physical; + } + + /// Returns the main graphics queue. + vk::Queue GetGraphicsQueue() const { + return graphics_queue; + } + + /// Returns the main present queue. + vk::Queue GetPresentQueue() const { + return present_queue; + } + + /// Returns main graphics queue family index. + u32 GetGraphicsFamily() const { + return graphics_family; + } + + /// Returns main present queue family index. + u32 GetPresentFamily() const { + return present_family; + } + + /// Returns if the device is integrated with the host CPU + bool IsIntegrated() const { + return device_type == vk::PhysicalDeviceType::eIntegratedGpu; + } + + /// Returns uniform buffer alignment requeriment + u64 GetUniformBufferAlignment() const { + return uniform_buffer_alignment; + } + + /// Checks if the physical device is suitable. + static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface); + +private: + /// Sets up queue families. + void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); + + /// Sets up device properties. + void SetupProperties(const vk::DispatchLoaderDynamic& dldi); + + /// Returns a list of queue initialization descriptors. + std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + + /// Returns true if a format is supported. + bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const; + + /// Returns the device properties for Vulkan formats. + static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( + const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); + + const vk::PhysicalDevice physical; ///< Physical device + vk::DispatchLoaderDynamic dld; ///< Device function pointers + UniqueDevice logical; ///< Logical device + vk::Queue graphics_queue; ///< Main graphics queue + vk::Queue present_queue; ///< Main present queue + u32 graphics_family{}; ///< Main graphics queue family index + u32 present_family{}; ///< Main present queue family index + vk::PhysicalDeviceType device_type; ///< Physical device type + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment + std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary +}; + +} // namespace Vulkan diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 38bb692d6..9fd4b273e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod SetRegister(bb, dest, value); } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index a992f73f8..55a6fbbf2 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = [&]() { switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::None: - return value; + case Tegra::Shader::F2iRoundingOp::RoundEven: + return Operation(OperationCode::FRoundEven, PRECISE, value); case Tegra::Shader::F2iRoundingOp::Floor: return Operation(OperationCode::FFloor, PRECISE, value); case Tegra::Shader::F2iRoundingOp::Ceil: @@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e006f8138..55ec601ff 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -306,7 +306,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case OpCode::Id::TLD4S: { UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); } @@ -315,9 +314,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node op_a = GetRegister(instr.gpr8); const Node op_b = GetRegister(instr.gpr20); - std::vector<Node> coords; - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. + std::vector<Node> coords; if (depth_compare) { // Note: TLD4S coordinate encoding works just like TEXS's const Node op_y = GetRegister(instr.gpr8.Value() + 1); @@ -328,18 +326,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { coords.push_back(op_a); coords.push_back(op_b); } - const auto num_coords = static_cast<u32>(coords.size()); - coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); + std::vector<Node> extras; + extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); const auto& sampler = GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, num_coords}; - values[element] = - Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + auto coords_copy = coords; + MetaTexture meta{sampler, {}, {}, extras, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } WriteTexsInstructionFloat(bb, instr, values); @@ -360,12 +357,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { switch (instr.txq.query_type) { case Tegra::Shader::TextureQueryType::Dimension: { for (u32 element = 0; element < 4; ++element) { - if (instr.txq.IsComponentEnabled(element)) { - MetaTexture meta{sampler, element}; - const Node value = Operation(OperationCode::F4TextureQueryDimensions, - std::move(meta), GetRegister(instr.gpr8)); - SetTemporal(bb, indexer++, value); + if (!instr.txq.IsComponentEnabled(element)) { + continue; } + MetaTexture meta{sampler, {}, {}, {}, element}; + const Node value = + Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + SetTemporal(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); @@ -412,9 +410,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { for (u32 element = 0; element < 2; ++element) { auto params = coords; - MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; - const Node value = - Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); + MetaTexture meta{sampler, {}, {}, {}, element}; + const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporal(bb, element, value); } for (u32 element = 0; element < 2; ++element) { @@ -432,7 +429,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); + LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); } WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); @@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, } Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - std::size_t array_offset, std::size_t bias_offset, - std::vector<Node>&& coords) { - UNIMPLEMENTED_IF_MSG( - (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || - (texture_type == TextureType::TextureCube && is_array && depth_compare), - "This method is not supported."); + TextureProcessMode process_mode, std::vector<Node> coords, + Node array, Node depth_compare, u32 bias_offset) { + const bool is_array = array; + const bool is_shadow = depth_compare; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || + (texture_type == TextureType::TextureCube && is_array && is_shadow), + "This method is not supported."); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, // LOD selection (either via bias or explicit textureLod) not supported in GL for // sampler2DArrayShadow and samplerCubeArrayShadow. const bool gl_lod_supported = - !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || - (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); + !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); const OperationCode read_method = - lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; + lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture; UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); - std::optional<u32> array_offset_value; - if (is_array) - array_offset_value = static_cast<u32>(array_offset); - - const auto coords_count = static_cast<u32>(coords.size()); - + std::vector<Node> extras; if (process_mode != TextureProcessMode::None && gl_lod_supported) { if (process_mode == TextureProcessMode::LZ) { - coords.push_back(Immediate(0.0f)); + extras.push_back(Immediate(0.0f)); } else { // If present, lod or bias are always stored in the register indexed by the gpr20 // field with an offset depending on the usage of the other registers - coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); + extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); } } Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, coords_count, array_offset_value}; - values[element] = Operation(read_method, std::move(meta), std::move(params)); + auto copy_coords = coords; + MetaTexture meta{sampler, array, depth_compare, extras, element}; + values[element] = Operation(read_method, meta, std::move(copy_coords)); } return values; @@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, for (std::size_t i = 0; i < coord_count; ++i) { coords.push_back(GetRegister(coord_register + i)); } - // 1D.DC in opengl the 2nd component is ignored. + // 1D.DC in OpenGL the 2nd component is ignored. if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { coords.push_back(Immediate(0.0f)); } - std::size_t array_offset{}; - if (is_array) { - array_offset = coords.size(); - coords.push_back(GetRegister(array_register)); - } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 - // or in the next register if lod or bias are used + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - coords.push_back(GetRegister(depth_register)); - } - // Fill ignored coordinates - while (coords.size() < total_coord_count) { - coords.push_back(Immediate(0)); + dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, - 0, std::move(coords)); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) ? static_cast<u64>(instr.gpr20.Value()) : coord_register + 1; + const u32 bias_offset = coord_count > 2 ? 1 : 0; std::vector<Node> coords; for (std::size_t i = 0; i < coord_count; ++i) { @@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); } - std::size_t array_offset{}; - if (is_array) { - array_offset = coords.size(); - coords.push_back(GetRegister(array_register)); - } + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 - // or in the next register if lod or bias are used + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - coords.push_back(GetRegister(depth_register)); - } - // Fill ignored coordinates - while (coords.size() < total_coord_count) { - coords.push_back(Immediate(0)); + dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, - (coord_count > 2 ? 1 : 0), std::move(coords)); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, @@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de const u64 coord_register = array_register + (is_array ? 1 : 0); std::vector<Node> coords; - - for (size_t i = 0; i < coord_count; ++i) { + for (size_t i = 0; i < coord_count; ++i) coords.push_back(GetRegister(coord_register + i)); - } - std::optional<u32> array_offset; - if (is_array) { - array_offset = static_cast<u32>(coords.size()); - coords.push_back(GetRegister(array_register)); - } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; - values[element] = - Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + auto coords_copy = coords; + MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } return values; @@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { const std::size_t type_coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; // If enabled arrays index is always stored in the gpr8 field @@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is : coord_register + 1; std::vector<Node> coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); } - std::optional<u32> array_offset; - if (is_array) { - array_offset = static_cast<u32>(coords.size()); - coords.push_back(GetRegister(array_register)); - } - const auto coords_count = static_cast<u32>(coords.size()); - if (lod_enabled) { - // When lod is used always is in grp20 - coords.push_back(GetRegister(instr.gpr20)); - } else { - coords.push_back(Immediate(0)); - } + const Node array = is_array ? GetRegister(array_register) : nullptr; + // When lod is used always is in gpr20 + const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, coords_count, array_offset}; - values[element] = - Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); + auto coords_copy = coords; + MetaTexture meta{sampler, array, {}, {lod}, element}; + values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1d4fbef53..52c7f2c4e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -156,12 +156,12 @@ enum class OperationCode { Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 + Texture, /// (MetaTexture, float[N] coords) -> float4 + TextureLod, /// (MetaTexture, float[N] coords) -> float4 + TextureGather, /// (MetaTexture, float[N] coords) -> float4 + TextureQueryDimensions, /// (MetaTexture, float a) -> float4 + TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 + TexelFetch, /// (MetaTexture, int[N], int) -> float4 Branch, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void @@ -288,9 +288,10 @@ struct MetaHalfArithmetic { struct MetaTexture { const Sampler& sampler; + Node array{}; + Node depth_compare{}; + std::vector<Node> extras; u32 element{}; - u32 coords_count{}; - std::optional<u32> array_index; }; constexpr MetaArithmetic PRECISE = {true}; @@ -754,9 +755,8 @@ private: bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, std::size_t array_offset, std::size_t bias_offset, - std::vector<Node>&& coords); + Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, + Node array, Node depth_compare, u32 bias_offset); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 2f6612a35..044ba116a 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -426,6 +426,8 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: return PixelFormat::ABGR8U; + case Tegra::FramebufferConfig::PixelFormat::BGRA8: + return PixelFormat::BGRA8; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); |