diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 3 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 3 | ||||
-rw-r--r-- | src/video_core/memory_manager.cpp | 18 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 42 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/wrapper.cpp | 21 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_half.cpp | 51 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 4 |
11 files changed, 119 insertions, 45 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cfcda4f53..3dfba8197 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1259,7 +1259,8 @@ public: GPUVAddr LimitAddress() const { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | - limit_low); + limit_low) + + 1; } } vertex_array_limit[NumVertexArrays]; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7231597d4..cde3a26b9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -655,6 +655,7 @@ union Instruction { } constexpr Instruction(u64 value) : value{value} {} + constexpr Instruction(const Instruction& instr) : value(instr.value) {} BitField<0, 8, Register> gpr0; BitField<8, 8, Register> gpr8; @@ -817,11 +818,9 @@ union Instruction { BitField<32, 1, u64> saturate; BitField<49, 2, HalfMerge> merge; - BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_a; BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; BitField<30, 1, u64> abs_b; BitField<28, 2, HalfType> type_b; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index fd49bc2a9..dbee9f634 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -51,11 +51,8 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); - ASSERT(system.CurrentProcess() - ->PageTable() - .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, - Kernel::Memory::MemoryAttribute::DeviceShared) - .IsSuccess()); + ASSERT( + system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess()); return gpu_addr; } @@ -66,11 +63,8 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) const u64 aligned_size{Common::AlignUp(size, page_size)}; MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); - ASSERT(system.CurrentProcess() - ->PageTable() - .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, - Kernel::Memory::MemoryAttribute::DeviceShared) - .IsSuccess()); + ASSERT( + system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess()); return gpu_addr; } @@ -87,9 +81,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() ->PageTable() - .SetMemoryAttribute(cpu_addr.value(), size, - Kernel::Memory::MemoryAttribute::DeviceShared, - Kernel::Memory::MemoryAttribute::None) + .UnlockForDeviceAddressSpace(cpu_addr.value(), size) .IsSuccess()); return gpu_addr; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4c16c89d2..6fe155bcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -186,8 +186,12 @@ void RasterizerOpenGL::SetupVertexBuffer() { const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - ASSERT(end > start); - const u64 size = end - start + 1; + ASSERT(end >= start); + const u64 size = end - start; + if (size == 0) { + glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); + continue; + } const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, vertex_array.stride); @@ -311,8 +315,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { const GPUVAddr start = regs.vertex_array[index].StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - ASSERT(end > start); - size += end - start + 1; + size += end - start; + ASSERT(end >= start); } return size; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 9fe6bdbf9..9a950f4de 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -129,7 +129,7 @@ struct alignas(32) FixedPipelineState { auto& binding = bindings[index]; binding.raw = 0; binding.enabled.Assign(enabled ? 1 : 0); - binding.stride.Assign(stride); + binding.stride.Assign(static_cast<u16>(stride)); binding_divisors[index] = divisor; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8a1f57891..68464e637 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -877,8 +877,12 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex const GPUVAddr start{vertex_array.StartAddress()}; const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; - ASSERT(end > start); - const std::size_t size{end - start + 1}; + ASSERT(end >= start); + const std::size_t size{end - start}; + if (size == 0) { + buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); + continue; + } const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); buffer_bindings.AddVertexBinding(buffer, offset); } @@ -1033,8 +1037,7 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) { if (!buffer.enabled) { // Set values to zero to unbind buffers - update_descriptor_queue.AddBuffer(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, - sizeof(float)); + update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); return; } @@ -1057,7 +1060,9 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd if (size == 0) { // Sometimes global memory pointers don't have a proper size. Upload a dummy entry // because Vulkan doesn't like empty buffers. - constexpr std::size_t dummy_size = 4; + // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the + // default buffer. + static constexpr std::size_t dummy_size = 4; const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); return; @@ -1222,7 +1227,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; DEBUG_ASSERT(end >= start); - size += (end - start + 1) * regs.vertex_array[index].enable; + size += (end - start) * regs.vertex_array[index].enable; } return size; } @@ -1269,4 +1274,29 @@ RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) return renderpass_params; } +VkBuffer RasterizerVulkan::DefaultBuffer() { + if (default_buffer) { + return *default_buffer; + } + + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = DEFAULT_BUFFER_SIZE; + ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + default_buffer = device.GetLogical().CreateBuffer(ci); + default_buffer_commit = memory_manager.Commit(default_buffer, false); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0); + }); + return *default_buffer; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2fa46b0cc..d41a7929e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -155,6 +155,7 @@ private: using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; + static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); void FlushWork(); @@ -247,6 +248,8 @@ private: RenderPassParams GetRenderPassParams(Texceptions texceptions) const; + VkBuffer DefaultBuffer(); + Core::System& system; Core::Frontend::EmuWindow& render_window; VKScreenInfo& screen_info; @@ -271,6 +274,9 @@ private: VKFenceManager fence_manager; VKQueryCache query_cache; + vk::Buffer default_buffer; + VKMemoryCommit default_buffer_commit; + std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 94d954d7a..c76ab5c2d 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -81,7 +81,7 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v ci.size = 1ULL << log2; ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ci.queueFamilyIndexCount = 0; ci.pQueueFamilyIndices = nullptr; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 539f3c974..7f5bc1404 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <exception> #include <memory> #include <optional> @@ -16,6 +17,23 @@ namespace Vulkan::vk { namespace { +void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) { + std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) { + // This will call Vulkan more than needed, but these calls are cheap. + const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties(); + const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties(); + + // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest. + const bool preferred = + (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && + rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) || + (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) || + (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) || + (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086); + return !preferred; + }); +} + template <typename T> bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name, VkInstance instance = nullptr) noexcept { @@ -389,7 +407,8 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices( if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { return std::nullopt; } - return physical_devices; + SortPhysicalDevices(physical_devices, *dld); + return std::make_optional(std::move(physical_devices)); } DebugCallback Instance::TryCreateDebugCallback( diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index ee7d9a29d..a276aee44 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -19,22 +19,46 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::HADD2_C || - opcode->get().GetId() == OpCode::Id::HADD2_R) { + bool negate_a = false; + bool negate_b = false; + bool absolute_a = false; + bool absolute_b = false; + + switch (opcode->get().GetId()) { + case OpCode::Id::HADD2_R: if (instr.alu_half.ftz == 0) { LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } + negate_a = ((instr.value >> 43) & 1) != 0; + negate_b = ((instr.value >> 31) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 30) & 1) != 0; + break; + case OpCode::Id::HADD2_C: + if (instr.alu_half.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } + negate_a = ((instr.value >> 43) & 1) != 0; + negate_b = ((instr.value >> 56) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 54) & 1) != 0; + break; + case OpCode::Id::HMUL2_R: + negate_a = ((instr.value >> 43) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 30) & 1) != 0; + break; + case OpCode::Id::HMUL2_C: + negate_b = ((instr.value >> 31) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 54) & 1) != 0; + break; } - const bool negate_a = - opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; - const bool negate_b = - opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); + op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> { + auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HMUL2_C: @@ -48,17 +72,16 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { } }(); op_b = UnpackHalfFloat(op_b, type_b); - // redeclaration to avoid a bug in clang with reusing local bindings in lambdas - Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); + op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - Node value = [&]() { + Node value = [this, opcode, op_a, op_b = op_b] { switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b); default: UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); return Immediate(0); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 0f4c3103a..9af8c606d 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -249,8 +249,8 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } case OpCode::Id::LEA_IMM: { const bool neg = instr.lea.imm.neg != 0; - return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), + return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), + Immediate(static_cast<u32>(instr.lea.imm.entry_a)), Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; } case OpCode::Id::LEA_RZ: { |