diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 2 | ||||
-rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
-rw-r--r-- | src/video_core/gpu_thread.h | 4 | ||||
-rw-r--r-- | src/video_core/macro/macro.cpp | 6 | ||||
-rw-r--r-- | src/video_core/memory_manager.cpp | 36 | ||||
-rw-r--r-- | src/video_core/memory_manager.h | 12 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 7 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.cpp | 85 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.h | 6 | ||||
-rw-r--r-- | src/video_core/texture_cache/format_lookup_table.cpp | 62 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 95 | ||||
-rw-r--r-- | src/video_core/textures/texture.cpp | 14 | ||||
-rw-r--r-- | src/video_core/textures/texture.h | 70 | ||||
-rw-r--r-- | src/video_core/video_core.cpp | 2 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 6 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.cpp | 2 |
16 files changed, 229 insertions, 186 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 1f656ffa8..abdc593df 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1442,7 +1442,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { } if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { address_size = - static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size)); + static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size)); } const u32 size = address_size; // TODO: Analyze stride and number of vertices vertex_buffers[index] = Binding{ diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 050b11874..3c5317777 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -31,8 +31,10 @@ static void RunThread(std::stop_token stop_token, Core::System& system, auto current_context = context.Acquire(); VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); + CommandDataContainer next; + while (!stop_token.stop_requested()) { - CommandDataContainer next = state.queue.PopWait(stop_token); + state.queue.PopWait(next, stop_token); if (stop_token.stop_requested()) { break; } @@ -116,7 +118,7 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { std::unique_lock lk(state.write_lock); const u64 fence{++state.last_fence}; - state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); + state.queue.EmplaceWait(std::move(command_data), fence, block); if (block) { Common::CondvarWait(state.cv, lk, thread.get_stop_token(), [this, fence] { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 90bcb5958..43940bd6d 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,8 +10,8 @@ #include <thread> #include <variant> +#include "common/bounded_threadsafe_queue.h" #include "common/polyfill_thread.h" -#include "common/threadsafe_queue.h" #include "video_core/framebuffer_config.h" namespace Tegra { @@ -97,7 +97,7 @@ struct CommandDataContainer { /// Struct used to synchronize the GPU thread struct SynchState final { - using CommandQueue = Common::MPSCQueue<CommandDataContainer, true>; + using CommandQueue = Common::MPSCQueue<CommandDataContainer>; std::mutex write_lock; CommandQueue queue; u64 last_fence{}; diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 82ad0477d..905505ca1 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -6,7 +6,7 @@ #include <optional> #include <span> -#include <boost/container_hash/hash.hpp> +#include "common/container_hash.h" #include <fstream> #include "common/assert.h" @@ -89,7 +89,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { if (!mid_method.has_value()) { cache_info.lle_program = Compile(macro_code->second); - cache_info.hash = boost::hash_value(macro_code->second); + cache_info.hash = Common::HashValue(macro_code->second); if (Settings::values.dump_macros) { Dump(cache_info.hash, macro_code->second); } @@ -100,7 +100,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { code.resize(macro_cached.size() - rebased_method); std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32)); - cache_info.hash = boost::hash_value(code); + cache_info.hash = Common::HashValue(code); cache_info.lle_program = Compile(code); if (Settings::values.dump_macros) { Dump(cache_info.hash, code); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 83924475b..01fb5b546 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -22,7 +22,7 @@ std::atomic<size_t> MemoryManager::unique_identifier_generator{}; MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_) - : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()}, + : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits != big_page_bits ? page_bits : 0}, @@ -43,7 +43,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_entries.resize(big_page_table_size / 32, 0); big_page_table_cpu.resize(big_page_table_size); - big_page_continous.resize(big_page_table_size / continous_bits, 0); + big_page_continuous.resize(big_page_table_size / continuous_bits, 0); entries.resize(page_table_size / 32, 0); } @@ -85,17 +85,17 @@ PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { return kind_map.GetValueAt(gpu_addr); } -inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const { - const u64 entry_mask = big_page_continous[big_page_index / continous_bits]; - const size_t sub_index = big_page_index % continous_bits; +inline bool MemoryManager::IsBigPageContinuous(size_t big_page_index) const { + const u64 entry_mask = big_page_continuous[big_page_index / continuous_bits]; + const size_t sub_index = big_page_index % continuous_bits; return ((entry_mask >> sub_index) & 0x1ULL) != 0; } -inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value) { - const u64 continous_mask = big_page_continous[big_page_index / continous_bits]; - const size_t sub_index = big_page_index % continous_bits; - big_page_continous[big_page_index / continous_bits] = - (~(1ULL << sub_index) & continous_mask) | (value ? 1ULL << sub_index : 0); +inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool value) { + const u64 continuous_mask = big_page_continuous[big_page_index / continuous_bits]; + const size_t sub_index = big_page_index % continuous_bits; + big_page_continuous[big_page_index / continuous_bits] = + (~(1ULL << sub_index) & continuous_mask) | (value ? 1ULL << sub_index : 0); } template <MemoryManager::EntryType entry_type> @@ -140,7 +140,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr const auto index = PageEntryIndex<true>(current_gpu_addr); const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); big_page_table_cpu[index] = sub_value; - const bool is_continous = ([&] { + const bool is_continuous = ([&] { uintptr_t base_ptr{ reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))}; if (base_ptr == 0) { @@ -156,7 +156,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr } return true; })(); - SetBigPageContinous(index, is_continous); + SetBigPageContinuous(index, is_continuous); } remaining_size -= big_page_size; } @@ -378,7 +378,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if (!IsBigPageContinous(page_index)) [[unlikely]] { + if (!IsBigPageContinuous(page_index)) [[unlikely]] { memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); } else { u8* physical = memory.GetPointer(cpu_addr_base); @@ -427,7 +427,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe if constexpr (is_safe) { rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); } - if (!IsBigPageContinous(page_index)) [[unlikely]] { + if (!IsBigPageContinuous(page_index)) [[unlikely]] { memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); } else { u8* physical = memory.GetPointer(cpu_addr_base); @@ -512,7 +512,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, return result; } -size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { +size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { std::optional<VAddr> old_page_addr{}; size_t range_so_far = 0; bool result{false}; @@ -553,7 +553,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { } size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { - return kind_map.GetContinousSizeFrom(gpu_addr); + return kind_map.GetContinuousSizeFrom(gpu_addr); } void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, @@ -594,7 +594,7 @@ void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { if (GetEntry<true>(gpu_addr) == EntryType::Mapped) [[likely]] { size_t page_index = gpu_addr >> big_page_bits; - if (IsBigPageContinous(page_index)) [[likely]] { + if (IsBigPageContinuous(page_index)) [[likely]] { const std::size_t page{(page_index & big_page_mask) + size}; return page <= big_page_size; } @@ -608,7 +608,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { return page <= Core::Memory::YUZU_PAGESIZE; } -bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { +bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { std::optional<VAddr> old_page_addr{}; bool result{true}; auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 51ae2de68..fbbe856c4 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -94,7 +94,7 @@ public: /** * Checks if a gpu region is mapped by a single range of cpu addresses. */ - [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; + [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; /** * Checks if a gpu region is mapped entirely. @@ -123,7 +123,7 @@ public: bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size, VideoCommon::CacheType which = VideoCommon::CacheType::All) const; - size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const; + size_t MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const; bool IsWithinGPUAddressRange(GPUVAddr gpu_addr) const { return gpu_addr < address_space_size; @@ -158,8 +158,8 @@ private: } } - inline bool IsBigPageContinous(size_t big_page_index) const; - inline void SetBigPageContinous(size_t big_page_index, bool value); + inline bool IsBigPageContinuous(size_t big_page_index) const; + inline void SetBigPageContinuous(size_t big_page_index, bool value); template <bool is_gpu_address> void GetSubmappedRangeImpl( @@ -213,10 +213,10 @@ private: Common::RangeMap<GPUVAddr, PTEKind> kind_map; Common::VirtualBuffer<u32> big_page_table_cpu; - std::vector<u64> big_page_continous; + std::vector<u64> big_page_continuous; std::vector<std::pair<VAddr, std::size_t>> page_stash{}; - static constexpr size_t continous_bits = 64; + static constexpr size_t continuous_bits = 64; const size_t unique_identifier; std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index ca52e2389..5dce51be8 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -166,7 +166,7 @@ struct FormatTuple { {VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM - {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT + {VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT {VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM @@ -234,11 +234,6 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with PixelFormat pixel_format) { ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples)); FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)]; - if (tuple.format == VK_FORMAT_UNDEFINED) { - UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format); - return FormatInfo{VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; - } - // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index e03685af1..b264e6ada 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -47,14 +47,15 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_) Scheduler::~Scheduler() = default; void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + // When flushing, we only send data to the worker thread; no waiting is necessary. SubmitExecution(signal_semaphore, wait_semaphore); AllocateNewContext(); } void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + // When finishing, we need to wait for the submission to have executed on the device. const u64 presubmit_tick = CurrentTick(); SubmitExecution(signal_semaphore, wait_semaphore); - WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); } @@ -63,8 +64,14 @@ void Scheduler::WaitWorker() { MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); - std::unique_lock lock{work_mutex}; - wait_cv.wait(lock, [this] { return work_queue.empty(); }); + // Ensure the queue is drained. + { + std::unique_lock ql{queue_mutex}; + event_cv.wait(ql, [this] { return work_queue.empty(); }); + } + + // Now wait for execution to finish. + std::scoped_lock el{execution_mutex}; } void Scheduler::DispatchWork() { @@ -72,10 +79,10 @@ void Scheduler::DispatchWork() { return; } { - std::scoped_lock lock{work_mutex}; + std::scoped_lock ql{queue_mutex}; work_queue.push(std::move(chunk)); } - work_cv.notify_one(); + event_cv.notify_all(); AcquireNewChunk(); } @@ -137,30 +144,55 @@ bool Scheduler::UpdateRescaling(bool is_rescaling) { void Scheduler::WorkerThread(std::stop_token stop_token) { Common::SetCurrentThreadName("VulkanWorker"); - do { + + const auto TryPopQueue{[this](auto& work) -> bool { + if (work_queue.empty()) { + return false; + } + + work = std::move(work_queue.front()); + work_queue.pop(); + event_cv.notify_all(); + return true; + }}; + + while (!stop_token.stop_requested()) { std::unique_ptr<CommandChunk> work; - bool has_submit{false}; + { - std::unique_lock lock{work_mutex}; - if (work_queue.empty()) { - wait_cv.notify_all(); - } - Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); }); + std::unique_lock lk{queue_mutex}; + + // Wait for work. + Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); }); + + // If we've been asked to stop, we're done. if (stop_token.stop_requested()) { - continue; + return; } - work = std::move(work_queue.front()); - work_queue.pop(); - has_submit = work->HasSubmit(); + // Exchange lock ownership so that we take the execution lock before + // the queue lock goes out of scope. This allows us to force execution + // to complete in the next step. + std::exchange(lk, std::unique_lock{execution_mutex}); + + // Perform the work, tracking whether the chunk was a submission + // before executing. + const bool has_submit = work->HasSubmit(); work->ExecuteAll(current_cmdbuf); + + // If the chunk was a submission, reallocate the command buffer. + if (has_submit) { + AllocateWorkerCommandBuffer(); + } } - if (has_submit) { - AllocateWorkerCommandBuffer(); + + { + std::scoped_lock rl{reserve_mutex}; + + // Recycle the chunk back to the reserve. + chunk_reserve.emplace_back(std::move(work)); } - std::scoped_lock reserve_lock{reserve_mutex}; - chunk_reserve.push_back(std::move(work)); - } while (!stop_token.stop_requested()); + } } void Scheduler::AllocateWorkerCommandBuffer() { @@ -289,13 +321,16 @@ void Scheduler::EndRenderPass() { } void Scheduler::AcquireNewChunk() { - std::scoped_lock lock{reserve_mutex}; + std::scoped_lock rl{reserve_mutex}; + if (chunk_reserve.empty()) { + // If we don't have anything reserved, we need to make a new chunk. chunk = std::make_unique<CommandChunk>(); - return; + } else { + // Otherwise, we can just take from the reserve. + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); } - chunk = std::move(chunk_reserve.back()); - chunk_reserve.pop_back(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bd4cb0f7e..8d75ce987 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -232,10 +232,10 @@ private: std::queue<std::unique_ptr<CommandChunk>> work_queue; std::vector<std::unique_ptr<CommandChunk>> chunk_reserve; + std::mutex execution_mutex; std::mutex reserve_mutex; - std::mutex work_mutex; - std::condition_variable_any work_cv; - std::condition_variable wait_cv; + std::mutex queue_mutex; + std::condition_variable_any event_cv; std::jthread worker_thread; }; diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 08aa8ca33..5fc2b2fec 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -42,15 +42,15 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType blue, ComponentType alpha, bool is_srgb) noexcept { switch (Hash(format, red, green, blue, alpha, is_srgb)) { - case Hash(TextureFormat::A8R8G8B8, UNORM): + case Hash(TextureFormat::A8B8G8R8, UNORM): return PixelFormat::A8B8G8R8_UNORM; - case Hash(TextureFormat::A8R8G8B8, SNORM): + case Hash(TextureFormat::A8B8G8R8, SNORM): return PixelFormat::A8B8G8R8_SNORM; - case Hash(TextureFormat::A8R8G8B8, UINT): + case Hash(TextureFormat::A8B8G8R8, UINT): return PixelFormat::A8B8G8R8_UINT; - case Hash(TextureFormat::A8R8G8B8, SINT): + case Hash(TextureFormat::A8B8G8R8, SINT): return PixelFormat::A8B8G8R8_SINT; - case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): + case Hash(TextureFormat::A8B8G8R8, UNORM, SRGB): return PixelFormat::A8B8G8R8_SRGB; case Hash(TextureFormat::B5G6R5, UNORM): return PixelFormat::B5G6R5_UNORM; @@ -74,13 +74,13 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::R8_UINT; case Hash(TextureFormat::R8, SINT): return PixelFormat::R8_SINT; - case Hash(TextureFormat::R8G8, UNORM): + case Hash(TextureFormat::G8R8, UNORM): return PixelFormat::R8G8_UNORM; - case Hash(TextureFormat::R8G8, SNORM): + case Hash(TextureFormat::G8R8, SNORM): return PixelFormat::R8G8_SNORM; - case Hash(TextureFormat::R8G8, UINT): + case Hash(TextureFormat::G8R8, UINT): return PixelFormat::R8G8_UINT; - case Hash(TextureFormat::R8G8, SINT): + case Hash(TextureFormat::G8R8, SINT): return PixelFormat::R8G8_SINT; case Hash(TextureFormat::R16G16B16A16, FLOAT): return PixelFormat::R16G16B16A16_FLOAT; @@ -136,49 +136,49 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::R32_SINT; case Hash(TextureFormat::E5B9G9R9, FLOAT): return PixelFormat::E5B9G9R9_FLOAT; - case Hash(TextureFormat::D32, FLOAT): + case Hash(TextureFormat::Z32, FLOAT): return PixelFormat::D32_FLOAT; - case Hash(TextureFormat::D16, UNORM): + case Hash(TextureFormat::Z16, UNORM): return PixelFormat::D16_UNORM; - case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): + case Hash(TextureFormat::Z24S8, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; - case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR): + case Hash(TextureFormat::Z24S8, UINT, UNORM, UINT, UINT, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; - case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): + case Hash(TextureFormat::G24R8, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; - case Hash(TextureFormat::D24S8, UNORM, UINT, UINT, UINT, LINEAR): + case Hash(TextureFormat::S8Z24, UNORM, UINT, UINT, UINT, LINEAR): return PixelFormat::D24_UNORM_S8_UINT; - case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): + case Hash(TextureFormat::Z32_X24S8, FLOAT, UINT, UNORM, UNORM, LINEAR): return PixelFormat::D32_FLOAT_S8_UINT; - case Hash(TextureFormat::R32_B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR): + case Hash(TextureFormat::R32B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR): return PixelFormat::D32_FLOAT_S8_UINT; - case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): + case Hash(TextureFormat::DXT1, UNORM, LINEAR): return PixelFormat::BC1_RGBA_UNORM; - case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): + case Hash(TextureFormat::DXT1, UNORM, SRGB): return PixelFormat::BC1_RGBA_SRGB; - case Hash(TextureFormat::BC2, UNORM, LINEAR): + case Hash(TextureFormat::DXT23, UNORM, LINEAR): return PixelFormat::BC2_UNORM; - case Hash(TextureFormat::BC2, UNORM, SRGB): + case Hash(TextureFormat::DXT23, UNORM, SRGB): return PixelFormat::BC2_SRGB; - case Hash(TextureFormat::BC3, UNORM, LINEAR): + case Hash(TextureFormat::DXT45, UNORM, LINEAR): return PixelFormat::BC3_UNORM; - case Hash(TextureFormat::BC3, UNORM, SRGB): + case Hash(TextureFormat::DXT45, UNORM, SRGB): return PixelFormat::BC3_SRGB; - case Hash(TextureFormat::BC4, UNORM): + case Hash(TextureFormat::DXN1, UNORM): return PixelFormat::BC4_UNORM; - case Hash(TextureFormat::BC4, SNORM): + case Hash(TextureFormat::DXN1, SNORM): return PixelFormat::BC4_SNORM; - case Hash(TextureFormat::BC5, UNORM): + case Hash(TextureFormat::DXN2, UNORM): return PixelFormat::BC5_UNORM; - case Hash(TextureFormat::BC5, SNORM): + case Hash(TextureFormat::DXN2, SNORM): return PixelFormat::BC5_SNORM; - case Hash(TextureFormat::BC7, UNORM, LINEAR): + case Hash(TextureFormat::BC7U, UNORM, LINEAR): return PixelFormat::BC7_UNORM; - case Hash(TextureFormat::BC7, UNORM, SRGB): + case Hash(TextureFormat::BC7U, UNORM, SRGB): return PixelFormat::BC7_SRGB; - case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): + case Hash(TextureFormat::BC6H_S16, FLOAT): return PixelFormat::BC6H_SFLOAT; - case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): + case Hash(TextureFormat::BC6H_U16, FLOAT): return PixelFormat::BC6H_UFLOAT; case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): return PixelFormat::ASTC_2D_4X4_UNORM; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c09eecd1a..ed5c768d8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1176,13 +1176,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const size_t size_bytes = CalculateGuestSizeInBytes(new_info); const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); - std::vector<ImageId> overlap_ids; + boost::container::small_vector<ImageId, 4> overlap_ids; std::unordered_set<ImageId> overlaps_found; - std::vector<ImageId> left_aliased_ids; - std::vector<ImageId> right_aliased_ids; + boost::container::small_vector<ImageId, 4> left_aliased_ids; + boost::container::small_vector<ImageId, 4> right_aliased_ids; std::unordered_set<ImageId> ignore_textures; - std::vector<ImageId> bad_overlap_ids; - std::vector<ImageId> all_siblings; + boost::container::small_vector<ImageId, 4> bad_overlap_ids; + boost::container::small_vector<ImageId, 4> all_siblings; const bool this_is_linear = info.type == ImageType::Linear; const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { if (True(overlap.flags & ImageFlagBits::Remapped)) { @@ -1269,7 +1269,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { new_image.flags |= ImageFlagBits::Sparse; } @@ -1298,16 +1298,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA Image& overlap = slot_images[overlap_id]; if (True(overlap.flags & ImageFlagBits::GpuModified)) { new_image.flags |= ImageFlagBits::GpuModified; - } - const auto& resolution = Settings::values.resolution_info; - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const u32 up_scale = can_rescale ? resolution.up_scale : 1; - const u32 down_shift = can_rescale ? resolution.down_shift : 0; - auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); - if (overlap.info.num_samples != new_image.info.num_samples) { - runtime.CopyImageMSAA(new_image, overlap, std::move(copies)); - } else { - runtime.CopyImage(new_image, overlap, std::move(copies)); + const auto& resolution = Settings::values.resolution_info; + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); + if (overlap.info.num_samples != new_image.info.num_samples) { + runtime.CopyImageMSAA(new_image, overlap, std::move(copies)); + } else { + runtime.CopyImage(new_image, overlap, std::move(copies)); + } } if (True(overlap.flags & ImageFlagBits::Tracked)) { UntrackImage(overlap, overlap_id); @@ -1616,37 +1616,38 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s return; } auto& gpu_page_table = gpu_page_table_storage[*storage_id]; - ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); + ForEachGPUPage(gpu_addr, size, + [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); for (const ImageId image_id : images) { slot_images[image_id].flags &= ~ImageFlagBits::Picked; } diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 26649aebf..4a80a59f9 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -14,7 +14,7 @@ namespace Tegra::Texture { namespace { -constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { +[[maybe_unused]] constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f, 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f, 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f, @@ -52,11 +52,13 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { } // Anonymous namespace std::array<float, 4> TSCEntry::BorderColor() const noexcept { - if (!srgb_conversion) { - return border_color; - } - return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g], - SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; + // TODO: Handle SRGB correctly. Using this breaks shadows in some games (Xenoblade). + // if (!srgb_conversion) { + // return border_color; + //} + // return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g], + // SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; + return border_color; } float TSCEntry::MaxAnisotropy() const noexcept { diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 7c4553a53..7e5837b20 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -15,26 +15,26 @@ enum class TextureFormat : u32 { R32G32B32 = 0x02, R16G16B16A16 = 0x03, R32G32 = 0x04, - R32_B24G8 = 0x05, + R32B24G8 = 0x05, ETC2_RGB = 0x06, X8B8G8R8 = 0x07, - A8R8G8B8 = 0x08, + A8B8G8R8 = 0x08, A2B10G10R10 = 0x09, ETC2_RGB_PTA = 0x0a, ETC2_RGBA = 0x0b, R16G16 = 0x0c, - R24G8 = 0x0d, - R8G24 = 0x0e, + G8R24 = 0x0d, + G24R8 = 0x0e, R32 = 0x0f, - BC6H_SFLOAT = 0x10, - BC6H_UFLOAT = 0x11, + BC6H_S16 = 0x10, + BC6H_U16 = 0x11, A4B4G4R4 = 0x12, A5B5G5R1 = 0x13, A1B5G5R5 = 0x14, B5G6R5 = 0x15, B6G5R5 = 0x16, - BC7 = 0x17, - R8G8 = 0x18, + BC7U = 0x17, + G8R8 = 0x18, EAC = 0x19, EACX2 = 0x1a, R16 = 0x1b, @@ -46,33 +46,33 @@ enum class TextureFormat : u32 { B10G11R11 = 0x21, G8B8G8R8 = 0x22, B8G8R8G8 = 0x23, - BC1_RGBA = 0x24, - BC2 = 0x25, - BC3 = 0x26, - BC4 = 0x27, - BC5 = 0x28, - S8D24 = 0x29, - X8D24 = 0x2a, - D24S8 = 0x2b, - X4V4D24__COV4R4V = 0x2c, - X4V4D24__COV8R8V = 0x2d, - V8D24__COV4R12V = 0x2e, - D32 = 0x2f, - D32S8 = 0x30, - X8D24_X20V4S8__COV4R4V = 0x31, - X8D24_X20V4S8__COV8R8V = 0x32, - D32_X20V4X8__COV4R4V = 0x33, - D32_X20V4X8__COV8R8V = 0x34, - D32_X20V4S8__COV4R4V = 0x35, - D32_X20V4S8__COV8R8V = 0x36, - X8D24_X16V8S8__COV4R12V = 0x37, - D32_X16V8X8__COV4R12V = 0x38, - D32_X16V8S8__COV4R12V = 0x39, - D16 = 0x3a, - V8D24__COV8R24V = 0x3b, - X8D24_X16V8S8__COV8R24V = 0x3c, - D32_X16V8X8__COV8R24V = 0x3d, - D32_X16V8S8__COV8R24V = 0x3e, + DXT1 = 0x24, + DXT23 = 0x25, + DXT45 = 0x26, + DXN1 = 0x27, + DXN2 = 0x28, + Z24S8 = 0x29, + X8Z24 = 0x2a, + S8Z24 = 0x2b, + X4V4Z24__COV4R4V = 0x2c, + X4V4Z24__COV8R8V = 0x2d, + V8Z24__COV4R12V = 0x2e, + Z32 = 0x2f, + Z32_X24S8 = 0x30, + X8Z24_X20V4S8__COV4R4V = 0x31, + X8Z24_X20V4S8__COV8R8V = 0x32, + Z32_X20V4X8__COV4R4V = 0x33, + Z32_X20V4X8__COV8R8V = 0x34, + Z32_X20V4S8__COV4R4V = 0x35, + Z32_X20V4S8__COV8R8V = 0x36, + X8Z24_X16V8S8__COV4R12V = 0x37, + Z32_X16V8X8__COV4R12V = 0x38, + Z32_X16V8S8__COV4R12V = 0x39, + Z16 = 0x3a, + V8Z24__COV8R24V = 0x3b, + X8Z24_X16V8S8__COV8R24V = 0x3c, + Z32_X16V8X8__COV8R24V = 0x3d, + Z32_X16V8S8__COV8R24V = 0x3e, ASTC_2D_4X4 = 0x40, ASTC_2D_5X5 = 0x41, ASTC_2D_6X6 = 0x42, diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index fedb4a7bb..b42d48416 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -18,7 +18,7 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, std::unique_ptr<Core::Frontend::GraphicsContext> context) { auto& telemetry_session = system.TelemetrySession(); - auto& cpu_memory = system.Memory(); + auto& cpu_memory = system.ApplicationMemory(); switch (Settings::values.renderer_backend.GetValue()) { case Settings::RendererBackend::OpenGL: diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index df348af55..6f288b3f8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -401,6 +401,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); } } + if (extensions.extended_dynamic_state3 && is_radv) { + LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation"); + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false; + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false; + dynamic_state3_blending = false; + } if (extensions.vertex_input_dynamic_state && is_radv) { // TODO(ameerj): Blacklist only offending driver versions // TODO(ameerj): Confirm if RDNA1 is affected diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 486d4dfaf..336f53700 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -375,6 +375,8 @@ const char* ToString(VkResult result) noexcept { return "VK_RESULT_MAX_ENUM"; case VkResult::VK_ERROR_COMPRESSION_EXHAUSTED_EXT: return "VK_ERROR_COMPRESSION_EXHAUSTED_EXT"; + case VkResult::VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT: + return "VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT"; } return "Unknown"; } |