summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h2
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h4
-rw-r--r--src/video_core/macro/macro.cpp6
-rw-r--r--src/video_core/memory_manager.cpp36
-rw-r--r--src/video_core/memory_manager.h12
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp85
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h6
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp62
-rw-r--r--src/video_core/texture_cache/texture_cache.h95
-rw-r--r--src/video_core/textures/texture.cpp14
-rw-r--r--src/video_core/textures/texture.h70
-rw-r--r--src/video_core/video_core.cpp2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp2
16 files changed, 229 insertions, 186 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 1f656ffa8..abdc593df 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1442,7 +1442,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
}
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
address_size =
- static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size));
+ static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size));
}
const u32 size = address_size; // TODO: Analyze stride and number of vertices
vertex_buffers[index] = Binding{
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 050b11874..3c5317777 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -31,8 +31,10 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
auto current_context = context.Acquire();
VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
+ CommandDataContainer next;
+
while (!stop_token.stop_requested()) {
- CommandDataContainer next = state.queue.PopWait(stop_token);
+ state.queue.PopWait(next, stop_token);
if (stop_token.stop_requested()) {
break;
}
@@ -116,7 +118,7 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
std::unique_lock lk(state.write_lock);
const u64 fence{++state.last_fence};
- state.queue.Push(CommandDataContainer(std::move(command_data), fence, block));
+ state.queue.EmplaceWait(std::move(command_data), fence, block);
if (block) {
Common::CondvarWait(state.cv, lk, thread.get_stop_token(), [this, fence] {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 90bcb5958..43940bd6d 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,8 +10,8 @@
#include <thread>
#include <variant>
+#include "common/bounded_threadsafe_queue.h"
#include "common/polyfill_thread.h"
-#include "common/threadsafe_queue.h"
#include "video_core/framebuffer_config.h"
namespace Tegra {
@@ -97,7 +97,7 @@ struct CommandDataContainer {
/// Struct used to synchronize the GPU thread
struct SynchState final {
- using CommandQueue = Common::MPSCQueue<CommandDataContainer, true>;
+ using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
std::mutex write_lock;
CommandQueue queue;
u64 last_fence{};
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 82ad0477d..905505ca1 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -6,7 +6,7 @@
#include <optional>
#include <span>
-#include <boost/container_hash/hash.hpp>
+#include "common/container_hash.h"
#include <fstream>
#include "common/assert.h"
@@ -89,7 +89,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
if (!mid_method.has_value()) {
cache_info.lle_program = Compile(macro_code->second);
- cache_info.hash = boost::hash_value(macro_code->second);
+ cache_info.hash = Common::HashValue(macro_code->second);
if (Settings::values.dump_macros) {
Dump(cache_info.hash, macro_code->second);
}
@@ -100,7 +100,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
code.resize(macro_cached.size() - rebased_method);
std::memcpy(code.data(), macro_cached.data() + rebased_method,
code.size() * sizeof(u32));
- cache_info.hash = boost::hash_value(code);
+ cache_info.hash = Common::HashValue(code);
cache_info.lle_program = Compile(code);
if (Settings::values.dump_macros) {
Dump(cache_info.hash, code);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 83924475b..01fb5b546 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -22,7 +22,7 @@ std::atomic<size_t> MemoryManager::unique_identifier_generator{};
MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
u64 page_bits_)
- : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()},
+ : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()},
address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
page_bits != big_page_bits ? page_bits : 0},
@@ -43,7 +43,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_entries.resize(big_page_table_size / 32, 0);
big_page_table_cpu.resize(big_page_table_size);
- big_page_continous.resize(big_page_table_size / continous_bits, 0);
+ big_page_continuous.resize(big_page_table_size / continuous_bits, 0);
entries.resize(page_table_size / 32, 0);
}
@@ -85,17 +85,17 @@ PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
return kind_map.GetValueAt(gpu_addr);
}
-inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
- const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
- const size_t sub_index = big_page_index % continous_bits;
+inline bool MemoryManager::IsBigPageContinuous(size_t big_page_index) const {
+ const u64 entry_mask = big_page_continuous[big_page_index / continuous_bits];
+ const size_t sub_index = big_page_index % continuous_bits;
return ((entry_mask >> sub_index) & 0x1ULL) != 0;
}
-inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value) {
- const u64 continous_mask = big_page_continous[big_page_index / continous_bits];
- const size_t sub_index = big_page_index % continous_bits;
- big_page_continous[big_page_index / continous_bits] =
- (~(1ULL << sub_index) & continous_mask) | (value ? 1ULL << sub_index : 0);
+inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool value) {
+ const u64 continuous_mask = big_page_continuous[big_page_index / continuous_bits];
+ const size_t sub_index = big_page_index % continuous_bits;
+ big_page_continuous[big_page_index / continuous_bits] =
+ (~(1ULL << sub_index) & continuous_mask) | (value ? 1ULL << sub_index : 0);
}
template <MemoryManager::EntryType entry_type>
@@ -140,7 +140,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
const auto index = PageEntryIndex<true>(current_gpu_addr);
const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
big_page_table_cpu[index] = sub_value;
- const bool is_continous = ([&] {
+ const bool is_continuous = ([&] {
uintptr_t base_ptr{
reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))};
if (base_ptr == 0) {
@@ -156,7 +156,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
}
return true;
})();
- SetBigPageContinous(index, is_continous);
+ SetBigPageContinuous(index, is_continuous);
}
remaining_size -= big_page_size;
}
@@ -378,7 +378,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
- if (!IsBigPageContinous(page_index)) [[unlikely]] {
+ if (!IsBigPageContinuous(page_index)) [[unlikely]] {
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
} else {
u8* physical = memory.GetPointer(cpu_addr_base);
@@ -427,7 +427,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
if constexpr (is_safe) {
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
}
- if (!IsBigPageContinous(page_index)) [[unlikely]] {
+ if (!IsBigPageContinuous(page_index)) [[unlikely]] {
memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
} else {
u8* physical = memory.GetPointer(cpu_addr_base);
@@ -512,7 +512,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
return result;
}
-size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
+size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
std::optional<VAddr> old_page_addr{};
size_t range_so_far = 0;
bool result{false};
@@ -553,7 +553,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
}
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
- return kind_map.GetContinousSizeFrom(gpu_addr);
+ return kind_map.GetContinuousSizeFrom(gpu_addr);
}
void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
@@ -594,7 +594,7 @@ void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
if (GetEntry<true>(gpu_addr) == EntryType::Mapped) [[likely]] {
size_t page_index = gpu_addr >> big_page_bits;
- if (IsBigPageContinous(page_index)) [[likely]] {
+ if (IsBigPageContinuous(page_index)) [[likely]] {
const std::size_t page{(page_index & big_page_mask) + size};
return page <= big_page_size;
}
@@ -608,7 +608,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
return page <= Core::Memory::YUZU_PAGESIZE;
}
-bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
+bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const {
std::optional<VAddr> old_page_addr{};
bool result{true};
auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 51ae2de68..fbbe856c4 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -94,7 +94,7 @@ public:
/**
* Checks if a gpu region is mapped by a single range of cpu addresses.
*/
- [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
+ [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const;
/**
* Checks if a gpu region is mapped entirely.
@@ -123,7 +123,7 @@ public:
bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
- size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const;
+ size_t MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const;
bool IsWithinGPUAddressRange(GPUVAddr gpu_addr) const {
return gpu_addr < address_space_size;
@@ -158,8 +158,8 @@ private:
}
}
- inline bool IsBigPageContinous(size_t big_page_index) const;
- inline void SetBigPageContinous(size_t big_page_index, bool value);
+ inline bool IsBigPageContinuous(size_t big_page_index) const;
+ inline void SetBigPageContinuous(size_t big_page_index, bool value);
template <bool is_gpu_address>
void GetSubmappedRangeImpl(
@@ -213,10 +213,10 @@ private:
Common::RangeMap<GPUVAddr, PTEKind> kind_map;
Common::VirtualBuffer<u32> big_page_table_cpu;
- std::vector<u64> big_page_continous;
+ std::vector<u64> big_page_continuous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
- static constexpr size_t continous_bits = 64;
+ static constexpr size_t continuous_bits = 64;
const size_t unique_identifier;
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index ca52e2389..5dce51be8 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -166,7 +166,7 @@ struct FormatTuple {
{VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
- {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
+ {VK_FORMAT_R32G32B32_SFLOAT}, // R32G32B32_FLOAT
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
@@ -234,11 +234,6 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
PixelFormat pixel_format) {
ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
- if (tuple.format == VK_FORMAT_UNDEFINED) {
- UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format);
- return FormatInfo{VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
- }
-
// Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index e03685af1..b264e6ada 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -47,14 +47,15 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
Scheduler::~Scheduler() = default;
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+ // When flushing, we only send data to the worker thread; no waiting is necessary.
SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
}
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+ // When finishing, we need to wait for the submission to have executed on the device.
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal_semaphore, wait_semaphore);
- WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
}
@@ -63,8 +64,14 @@ void Scheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
- std::unique_lock lock{work_mutex};
- wait_cv.wait(lock, [this] { return work_queue.empty(); });
+ // Ensure the queue is drained.
+ {
+ std::unique_lock ql{queue_mutex};
+ event_cv.wait(ql, [this] { return work_queue.empty(); });
+ }
+
+ // Now wait for execution to finish.
+ std::scoped_lock el{execution_mutex};
}
void Scheduler::DispatchWork() {
@@ -72,10 +79,10 @@ void Scheduler::DispatchWork() {
return;
}
{
- std::scoped_lock lock{work_mutex};
+ std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
- work_cv.notify_one();
+ event_cv.notify_all();
AcquireNewChunk();
}
@@ -137,30 +144,55 @@ bool Scheduler::UpdateRescaling(bool is_rescaling) {
void Scheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("VulkanWorker");
- do {
+
+ const auto TryPopQueue{[this](auto& work) -> bool {
+ if (work_queue.empty()) {
+ return false;
+ }
+
+ work = std::move(work_queue.front());
+ work_queue.pop();
+ event_cv.notify_all();
+ return true;
+ }};
+
+ while (!stop_token.stop_requested()) {
std::unique_ptr<CommandChunk> work;
- bool has_submit{false};
+
{
- std::unique_lock lock{work_mutex};
- if (work_queue.empty()) {
- wait_cv.notify_all();
- }
- Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); });
+ std::unique_lock lk{queue_mutex};
+
+ // Wait for work.
+ Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); });
+
+ // If we've been asked to stop, we're done.
if (stop_token.stop_requested()) {
- continue;
+ return;
}
- work = std::move(work_queue.front());
- work_queue.pop();
- has_submit = work->HasSubmit();
+ // Exchange lock ownership so that we take the execution lock before
+ // the queue lock goes out of scope. This allows us to force execution
+ // to complete in the next step.
+ std::exchange(lk, std::unique_lock{execution_mutex});
+
+ // Perform the work, tracking whether the chunk was a submission
+ // before executing.
+ const bool has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf);
+
+ // If the chunk was a submission, reallocate the command buffer.
+ if (has_submit) {
+ AllocateWorkerCommandBuffer();
+ }
}
- if (has_submit) {
- AllocateWorkerCommandBuffer();
+
+ {
+ std::scoped_lock rl{reserve_mutex};
+
+ // Recycle the chunk back to the reserve.
+ chunk_reserve.emplace_back(std::move(work));
}
- std::scoped_lock reserve_lock{reserve_mutex};
- chunk_reserve.push_back(std::move(work));
- } while (!stop_token.stop_requested());
+ }
}
void Scheduler::AllocateWorkerCommandBuffer() {
@@ -289,13 +321,16 @@ void Scheduler::EndRenderPass() {
}
void Scheduler::AcquireNewChunk() {
- std::scoped_lock lock{reserve_mutex};
+ std::scoped_lock rl{reserve_mutex};
+
if (chunk_reserve.empty()) {
+ // If we don't have anything reserved, we need to make a new chunk.
chunk = std::make_unique<CommandChunk>();
- return;
+ } else {
+ // Otherwise, we can just take from the reserve.
+ chunk = std::move(chunk_reserve.back());
+ chunk_reserve.pop_back();
}
- chunk = std::move(chunk_reserve.back());
- chunk_reserve.pop_back();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index bd4cb0f7e..8d75ce987 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -232,10 +232,10 @@ private:
std::queue<std::unique_ptr<CommandChunk>> work_queue;
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
+ std::mutex execution_mutex;
std::mutex reserve_mutex;
- std::mutex work_mutex;
- std::condition_variable_any work_cv;
- std::condition_variable wait_cv;
+ std::mutex queue_mutex;
+ std::condition_variable_any event_cv;
std::jthread worker_thread;
};
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 08aa8ca33..5fc2b2fec 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -42,15 +42,15 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
ComponentType blue, ComponentType alpha,
bool is_srgb) noexcept {
switch (Hash(format, red, green, blue, alpha, is_srgb)) {
- case Hash(TextureFormat::A8R8G8B8, UNORM):
+ case Hash(TextureFormat::A8B8G8R8, UNORM):
return PixelFormat::A8B8G8R8_UNORM;
- case Hash(TextureFormat::A8R8G8B8, SNORM):
+ case Hash(TextureFormat::A8B8G8R8, SNORM):
return PixelFormat::A8B8G8R8_SNORM;
- case Hash(TextureFormat::A8R8G8B8, UINT):
+ case Hash(TextureFormat::A8B8G8R8, UINT):
return PixelFormat::A8B8G8R8_UINT;
- case Hash(TextureFormat::A8R8G8B8, SINT):
+ case Hash(TextureFormat::A8B8G8R8, SINT):
return PixelFormat::A8B8G8R8_SINT;
- case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
+ case Hash(TextureFormat::A8B8G8R8, UNORM, SRGB):
return PixelFormat::A8B8G8R8_SRGB;
case Hash(TextureFormat::B5G6R5, UNORM):
return PixelFormat::B5G6R5_UNORM;
@@ -74,13 +74,13 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::R8_UINT;
case Hash(TextureFormat::R8, SINT):
return PixelFormat::R8_SINT;
- case Hash(TextureFormat::R8G8, UNORM):
+ case Hash(TextureFormat::G8R8, UNORM):
return PixelFormat::R8G8_UNORM;
- case Hash(TextureFormat::R8G8, SNORM):
+ case Hash(TextureFormat::G8R8, SNORM):
return PixelFormat::R8G8_SNORM;
- case Hash(TextureFormat::R8G8, UINT):
+ case Hash(TextureFormat::G8R8, UINT):
return PixelFormat::R8G8_UINT;
- case Hash(TextureFormat::R8G8, SINT):
+ case Hash(TextureFormat::G8R8, SINT):
return PixelFormat::R8G8_SINT;
case Hash(TextureFormat::R16G16B16A16, FLOAT):
return PixelFormat::R16G16B16A16_FLOAT;
@@ -136,49 +136,49 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::R32_SINT;
case Hash(TextureFormat::E5B9G9R9, FLOAT):
return PixelFormat::E5B9G9R9_FLOAT;
- case Hash(TextureFormat::D32, FLOAT):
+ case Hash(TextureFormat::Z32, FLOAT):
return PixelFormat::D32_FLOAT;
- case Hash(TextureFormat::D16, UNORM):
+ case Hash(TextureFormat::Z16, UNORM):
return PixelFormat::D16_UNORM;
- case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
+ case Hash(TextureFormat::Z24S8, UINT, UNORM, UNORM, UNORM, LINEAR):
return PixelFormat::S8_UINT_D24_UNORM;
- case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR):
+ case Hash(TextureFormat::Z24S8, UINT, UNORM, UINT, UINT, LINEAR):
return PixelFormat::S8_UINT_D24_UNORM;
- case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
+ case Hash(TextureFormat::G24R8, UINT, UNORM, UNORM, UNORM, LINEAR):
return PixelFormat::S8_UINT_D24_UNORM;
- case Hash(TextureFormat::D24S8, UNORM, UINT, UINT, UINT, LINEAR):
+ case Hash(TextureFormat::S8Z24, UNORM, UINT, UINT, UINT, LINEAR):
return PixelFormat::D24_UNORM_S8_UINT;
- case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+ case Hash(TextureFormat::Z32_X24S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
return PixelFormat::D32_FLOAT_S8_UINT;
- case Hash(TextureFormat::R32_B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+ case Hash(TextureFormat::R32B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR):
return PixelFormat::D32_FLOAT_S8_UINT;
- case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
+ case Hash(TextureFormat::DXT1, UNORM, LINEAR):
return PixelFormat::BC1_RGBA_UNORM;
- case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
+ case Hash(TextureFormat::DXT1, UNORM, SRGB):
return PixelFormat::BC1_RGBA_SRGB;
- case Hash(TextureFormat::BC2, UNORM, LINEAR):
+ case Hash(TextureFormat::DXT23, UNORM, LINEAR):
return PixelFormat::BC2_UNORM;
- case Hash(TextureFormat::BC2, UNORM, SRGB):
+ case Hash(TextureFormat::DXT23, UNORM, SRGB):
return PixelFormat::BC2_SRGB;
- case Hash(TextureFormat::BC3, UNORM, LINEAR):
+ case Hash(TextureFormat::DXT45, UNORM, LINEAR):
return PixelFormat::BC3_UNORM;
- case Hash(TextureFormat::BC3, UNORM, SRGB):
+ case Hash(TextureFormat::DXT45, UNORM, SRGB):
return PixelFormat::BC3_SRGB;
- case Hash(TextureFormat::BC4, UNORM):
+ case Hash(TextureFormat::DXN1, UNORM):
return PixelFormat::BC4_UNORM;
- case Hash(TextureFormat::BC4, SNORM):
+ case Hash(TextureFormat::DXN1, SNORM):
return PixelFormat::BC4_SNORM;
- case Hash(TextureFormat::BC5, UNORM):
+ case Hash(TextureFormat::DXN2, UNORM):
return PixelFormat::BC5_UNORM;
- case Hash(TextureFormat::BC5, SNORM):
+ case Hash(TextureFormat::DXN2, SNORM):
return PixelFormat::BC5_SNORM;
- case Hash(TextureFormat::BC7, UNORM, LINEAR):
+ case Hash(TextureFormat::BC7U, UNORM, LINEAR):
return PixelFormat::BC7_UNORM;
- case Hash(TextureFormat::BC7, UNORM, SRGB):
+ case Hash(TextureFormat::BC7U, UNORM, SRGB):
return PixelFormat::BC7_SRGB;
- case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
+ case Hash(TextureFormat::BC6H_S16, FLOAT):
return PixelFormat::BC6H_SFLOAT;
- case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
+ case Hash(TextureFormat::BC6H_U16, FLOAT):
return PixelFormat::BC6H_UFLOAT;
case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
return PixelFormat::ASTC_2D_4X4_UNORM;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c09eecd1a..ed5c768d8 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1176,13 +1176,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
- std::vector<ImageId> overlap_ids;
+ boost::container::small_vector<ImageId, 4> overlap_ids;
std::unordered_set<ImageId> overlaps_found;
- std::vector<ImageId> left_aliased_ids;
- std::vector<ImageId> right_aliased_ids;
+ boost::container::small_vector<ImageId, 4> left_aliased_ids;
+ boost::container::small_vector<ImageId, 4> right_aliased_ids;
std::unordered_set<ImageId> ignore_textures;
- std::vector<ImageId> bad_overlap_ids;
- std::vector<ImageId> all_siblings;
+ boost::container::small_vector<ImageId, 4> bad_overlap_ids;
+ boost::container::small_vector<ImageId, 4> all_siblings;
const bool this_is_linear = info.type == ImageType::Linear;
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
if (True(overlap.flags & ImageFlagBits::Remapped)) {
@@ -1269,7 +1269,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
- if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+ if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
new_image.flags |= ImageFlagBits::Sparse;
}
@@ -1298,16 +1298,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
new_image.flags |= ImageFlagBits::GpuModified;
- }
- const auto& resolution = Settings::values.resolution_info;
- const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
- const u32 up_scale = can_rescale ? resolution.up_scale : 1;
- const u32 down_shift = can_rescale ? resolution.down_shift : 0;
- auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
- if (overlap.info.num_samples != new_image.info.num_samples) {
- runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
- } else {
- runtime.CopyImage(new_image, overlap, std::move(copies));
+ const auto& resolution = Settings::values.resolution_info;
+ const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
+ const u32 up_scale = can_rescale ? resolution.up_scale : 1;
+ const u32 down_shift = can_rescale ? resolution.down_shift : 0;
+ auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
+ if (overlap.info.num_samples != new_image.info.num_samples) {
+ runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
+ } else {
+ runtime.CopyImage(new_image, overlap, std::move(copies));
+ }
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);
@@ -1616,37 +1616,38 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s
return;
}
auto& gpu_page_table = gpu_page_table_storage[*storage_id];
- ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) {
- const auto it = gpu_page_table.find(page);
- if (it == gpu_page_table.end()) {
- if constexpr (BOOL_BREAK) {
- return false;
- } else {
- return;
- }
- }
- for (const ImageId image_id : it->second) {
- Image& image = slot_images[image_id];
- if (True(image.flags & ImageFlagBits::Picked)) {
- continue;
- }
- if (!image.OverlapsGPU(gpu_addr, size)) {
- continue;
- }
- image.flags |= ImageFlagBits::Picked;
- images.push_back(image_id);
- if constexpr (BOOL_BREAK) {
- if (func(image_id, image)) {
- return true;
- }
- } else {
- func(image_id, image);
- }
- }
- if constexpr (BOOL_BREAK) {
- return false;
- }
- });
+ ForEachGPUPage(gpu_addr, size,
+ [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) {
+ const auto it = gpu_page_table.find(page);
+ if (it == gpu_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
+ for (const ImageId image_id : it->second) {
+ Image& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ if (!image.OverlapsGPU(gpu_addr, size)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(image_id, image)) {
+ return true;
+ }
+ } else {
+ func(image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
for (const ImageId image_id : images) {
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 26649aebf..4a80a59f9 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -14,7 +14,7 @@ namespace Tegra::Texture {
namespace {
-constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
+[[maybe_unused]] constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
@@ -52,11 +52,13 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
} // Anonymous namespace
std::array<float, 4> TSCEntry::BorderColor() const noexcept {
- if (!srgb_conversion) {
- return border_color;
- }
- return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
- SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
+ // TODO: Handle SRGB correctly. Using this breaks shadows in some games (Xenoblade).
+ // if (!srgb_conversion) {
+ // return border_color;
+ //}
+ // return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
+ // SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
+ return border_color;
}
float TSCEntry::MaxAnisotropy() const noexcept {
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7c4553a53..7e5837b20 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -15,26 +15,26 @@ enum class TextureFormat : u32 {
R32G32B32 = 0x02,
R16G16B16A16 = 0x03,
R32G32 = 0x04,
- R32_B24G8 = 0x05,
+ R32B24G8 = 0x05,
ETC2_RGB = 0x06,
X8B8G8R8 = 0x07,
- A8R8G8B8 = 0x08,
+ A8B8G8R8 = 0x08,
A2B10G10R10 = 0x09,
ETC2_RGB_PTA = 0x0a,
ETC2_RGBA = 0x0b,
R16G16 = 0x0c,
- R24G8 = 0x0d,
- R8G24 = 0x0e,
+ G8R24 = 0x0d,
+ G24R8 = 0x0e,
R32 = 0x0f,
- BC6H_SFLOAT = 0x10,
- BC6H_UFLOAT = 0x11,
+ BC6H_S16 = 0x10,
+ BC6H_U16 = 0x11,
A4B4G4R4 = 0x12,
A5B5G5R1 = 0x13,
A1B5G5R5 = 0x14,
B5G6R5 = 0x15,
B6G5R5 = 0x16,
- BC7 = 0x17,
- R8G8 = 0x18,
+ BC7U = 0x17,
+ G8R8 = 0x18,
EAC = 0x19,
EACX2 = 0x1a,
R16 = 0x1b,
@@ -46,33 +46,33 @@ enum class TextureFormat : u32 {
B10G11R11 = 0x21,
G8B8G8R8 = 0x22,
B8G8R8G8 = 0x23,
- BC1_RGBA = 0x24,
- BC2 = 0x25,
- BC3 = 0x26,
- BC4 = 0x27,
- BC5 = 0x28,
- S8D24 = 0x29,
- X8D24 = 0x2a,
- D24S8 = 0x2b,
- X4V4D24__COV4R4V = 0x2c,
- X4V4D24__COV8R8V = 0x2d,
- V8D24__COV4R12V = 0x2e,
- D32 = 0x2f,
- D32S8 = 0x30,
- X8D24_X20V4S8__COV4R4V = 0x31,
- X8D24_X20V4S8__COV8R8V = 0x32,
- D32_X20V4X8__COV4R4V = 0x33,
- D32_X20V4X8__COV8R8V = 0x34,
- D32_X20V4S8__COV4R4V = 0x35,
- D32_X20V4S8__COV8R8V = 0x36,
- X8D24_X16V8S8__COV4R12V = 0x37,
- D32_X16V8X8__COV4R12V = 0x38,
- D32_X16V8S8__COV4R12V = 0x39,
- D16 = 0x3a,
- V8D24__COV8R24V = 0x3b,
- X8D24_X16V8S8__COV8R24V = 0x3c,
- D32_X16V8X8__COV8R24V = 0x3d,
- D32_X16V8S8__COV8R24V = 0x3e,
+ DXT1 = 0x24,
+ DXT23 = 0x25,
+ DXT45 = 0x26,
+ DXN1 = 0x27,
+ DXN2 = 0x28,
+ Z24S8 = 0x29,
+ X8Z24 = 0x2a,
+ S8Z24 = 0x2b,
+ X4V4Z24__COV4R4V = 0x2c,
+ X4V4Z24__COV8R8V = 0x2d,
+ V8Z24__COV4R12V = 0x2e,
+ Z32 = 0x2f,
+ Z32_X24S8 = 0x30,
+ X8Z24_X20V4S8__COV4R4V = 0x31,
+ X8Z24_X20V4S8__COV8R8V = 0x32,
+ Z32_X20V4X8__COV4R4V = 0x33,
+ Z32_X20V4X8__COV8R8V = 0x34,
+ Z32_X20V4S8__COV4R4V = 0x35,
+ Z32_X20V4S8__COV8R8V = 0x36,
+ X8Z24_X16V8S8__COV4R12V = 0x37,
+ Z32_X16V8X8__COV4R12V = 0x38,
+ Z32_X16V8S8__COV4R12V = 0x39,
+ Z16 = 0x3a,
+ V8Z24__COV8R24V = 0x3b,
+ X8Z24_X16V8S8__COV8R24V = 0x3c,
+ Z32_X16V8X8__COV8R24V = 0x3d,
+ Z32_X16V8S8__COV8R24V = 0x3e,
ASTC_2D_4X4 = 0x40,
ASTC_2D_5X5 = 0x41,
ASTC_2D_6X6 = 0x42,
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index fedb4a7bb..b42d48416 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -18,7 +18,7 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
std::unique_ptr<Core::Frontend::GraphicsContext> context) {
auto& telemetry_session = system.TelemetrySession();
- auto& cpu_memory = system.Memory();
+ auto& cpu_memory = system.ApplicationMemory();
switch (Settings::values.renderer_backend.GetValue()) {
case Settings::RendererBackend::OpenGL:
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index df348af55..6f288b3f8 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -401,6 +401,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
}
}
+ if (extensions.extended_dynamic_state3 && is_radv) {
+ LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation");
+ features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
+ features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
+ dynamic_state3_blending = false;
+ }
if (extensions.vertex_input_dynamic_state && is_radv) {
// TODO(ameerj): Blacklist only offending driver versions
// TODO(ameerj): Confirm if RDNA1 is affected
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 486d4dfaf..336f53700 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -375,6 +375,8 @@ const char* ToString(VkResult result) noexcept {
return "VK_RESULT_MAX_ENUM";
case VkResult::VK_ERROR_COMPRESSION_EXHAUSTED_EXT:
return "VK_ERROR_COMPRESSION_EXHAUSTED_EXT";
+ case VkResult::VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT:
+ return "VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT";
}
return "Unknown";
}