summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h71
-rw-r--r--src/video_core/cdma_pusher.cpp2
-rw-r--r--src/video_core/cdma_pusher.h1
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp36
-rw-r--r--src/video_core/command_classes/codecs/codec.h2
-rw-r--r--src/video_core/command_classes/codecs/vp8.cpp1
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h1
-rw-r--r--src/video_core/command_classes/host1x.h2
-rw-r--r--src/video_core/dma_pusher.cpp1
-rw-r--r--src/video_core/engines/fermi_2d.h1
-rw-r--r--src/video_core/engines/kepler_compute.cpp1
-rw-r--r--src/video_core/engines/kepler_compute.h1
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp114
-rw-r--r--src/video_core/engines/maxwell_3d.h56
-rw-r--r--src/video_core/engines/maxwell_dma.cpp20
-rw-r--r--src/video_core/engines/maxwell_dma.h4
-rw-r--r--src/video_core/fence_manager.h2
-rw-r--r--src/video_core/framebuffer_config.h29
-rw-r--r--src/video_core/gpu.cpp6
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/gpu_thread.cpp2
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt2
-rw-r--r--src/video_core/host_shaders/convert_s8d24_to_abgr8.frag23
-rw-r--r--src/video_core/host_shaders/opengl_convert_s8d24.comp18
-rw-r--r--src/video_core/memory_manager.cpp1
-rw-r--r--src/video_core/query_cache.h2
-rw-r--r--src/video_core/renderer_base.cpp5
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h14
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h1
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_device.h7
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h8
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h4
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp57
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h16
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp18
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h8
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp24
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h3
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp16
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h6
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h1
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h2
-rw-r--r--src/video_core/renderer_vulkan/pipeline_statistics.cpp4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp3
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp69
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h1
-rw-r--r--src/video_core/shader_cache.cpp2
-rw-r--r--src/video_core/shader_environment.cpp1
-rw-r--r--src/video_core/shader_notify.cpp1
-rw-r--r--src/video_core/shader_notify.h1
-rw-r--r--src/video_core/surface.cpp8
-rw-r--r--src/video_core/surface.h10
-rw-r--r--src/video_core/texture_cache/descriptor_table.h1
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp6
-rw-r--r--src/video_core/texture_cache/formatter.h4
-rw-r--r--src/video_core/texture_cache/image_base.h7
-rw-r--r--src/video_core/texture_cache/render_targets.h1
-rw-r--r--src/video_core/texture_cache/slot_vector.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h82
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h8
-rw-r--r--src/video_core/texture_cache/util.h2
-rw-r--r--src/video_core/textures/astc.cpp1
-rw-r--r--src/video_core/textures/astc.h3
-rw-r--r--src/video_core/textures/decoders.cpp2
-rw-r--r--src/video_core/textures/texture.cpp1
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h14
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp71
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h12
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp2
-rw-r--r--src/video_core/vulkan_common/vulkan_library.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp2
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h1
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp15
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h5
118 files changed, 757 insertions, 406 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index be2113f5a..10975884b 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -212,7 +212,7 @@ public:
void FlushCachedWrites() noexcept {
flags &= ~BufferFlagBits::CachedWrites;
const u64 num_words = NumWords();
- const u64* const cached_words = Array<Type::CachedCPU>();
+ u64* const cached_words = Array<Type::CachedCPU>();
u64* const untracked_words = Array<Type::Untracked>();
u64* const cpu_words = Array<Type::CPU>();
for (u64 word_index = 0; word_index < num_words; ++word_index) {
@@ -220,6 +220,7 @@ public:
NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
untracked_words[word_index] |= cached_bits;
cpu_words[word_index] |= cached_bits;
+ cached_words[word_index] = 0;
}
}
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index fa26eb8b0..3f2bf6294 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -11,7 +11,6 @@
#include <mutex>
#include <numeric>
#include <span>
-#include <unordered_map>
#include <vector>
#include <boost/container/small_vector.hpp>
@@ -22,7 +21,6 @@
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/microprofile.h"
-#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
@@ -78,8 +76,9 @@ class BufferCache {
static constexpr BufferId NULL_BUFFER_ID{0};
- static constexpr u64 EXPECTED_MEMORY = 512_MiB;
- static constexpr u64 CRITICAL_MEMORY = 1_GiB;
+ static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
+ static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
+ static constexpr s64 TARGET_THRESHOLD = 4_GiB;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -438,6 +437,8 @@ private:
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
u64 frame_tick = 0;
u64 total_used_memory = 0;
+ u64 minimum_memory = 0;
+ u64 critical_memory = 0;
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
};
@@ -453,11 +454,30 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
common_ranges.clear();
+
+ if (!runtime.CanReportMemoryUsage()) {
+ minimum_memory = DEFAULT_EXPECTED_MEMORY;
+ critical_memory = DEFAULT_CRITICAL_MEMORY;
+ return;
+ }
+
+ const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
+ const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
+ const s64 min_spacing_critical = device_memory - 1_GiB;
+ const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
+ const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
+ const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
+ minimum_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
+ DEFAULT_EXPECTED_MEMORY));
+ critical_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
+ DEFAULT_CRITICAL_MEMORY));
}
template <class P>
void BufferCache<P>::RunGarbageCollector() {
- const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
+ const bool aggressive_gc = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
int num_iterations = aggressive_gc ? 64 : 32;
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
@@ -488,7 +508,11 @@ void BufferCache<P>::TickFrame() {
const bool skip_preferred = hits * 256 < shots * 251;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
- if (total_used_memory >= EXPECTED_MEMORY) {
+ // If we can obtain the memory info, use it instead of the estimate.
+ if (runtime.CanReportMemoryUsage()) {
+ total_used_memory = runtime.GetDeviceMemoryUsage();
+ }
+ if (total_used_memory >= minimum_memory) {
RunGarbageCollector();
}
++frame_tick;
@@ -1287,7 +1311,20 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
const GPUVAddr gpu_addr_begin = array.StartAddress();
const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
- const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ if (address_size >= 64_MiB) {
+ // Reported vertex buffer size is very large, cap to mapped buffer size
+ GPUVAddr submapped_addr_end = gpu_addr_begin;
+
+ const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
+ if (ranges.size() > 0) {
+ const auto& [addr, size] = *ranges.begin();
+ submapped_addr_end = addr + size;
+ }
+
+ address_size =
+ std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
+ }
const u32 size = address_size; // TODO: Analyze stride and number of vertices
if (array.enable == 0 || size == 0 || !cpu_addr) {
vertex_buffers[index] = NULL_BINDING;
@@ -1469,19 +1506,27 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
overlap_ids.push_back(overlap_id);
overlap.Pick();
const VAddr overlap_cpu_addr = overlap.CpuAddr();
- if (overlap_cpu_addr < begin) {
+ const bool expands_left = overlap_cpu_addr < begin;
+ if (expands_left) {
cpu_addr = begin = overlap_cpu_addr;
}
- end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
-
+ const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes();
+ const bool expands_right = overlap_end > end;
+ if (overlap_end > end) {
+ end = overlap_end;
+ }
stream_score += overlap.StreamScore();
if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
// When this memory region has been joined a bunch of times, we assume it's being used
// as a stream buffer. Increase the size to skip constantly recreating buffers.
has_stream_leap = true;
- begin -= PAGE_SIZE * 256;
- cpu_addr = begin;
- end += PAGE_SIZE * 256;
+ if (expands_right) {
+ begin -= PAGE_SIZE * 256;
+ cpu_addr = begin;
+ }
+ if (expands_left) {
+ end += PAGE_SIZE * 256;
+ }
}
}
return OverlapResult{
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a8c4b4415..8dd840558 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -23,11 +23,9 @@
#include "command_classes/nvdec.h"
#include "command_classes/vic.h"
#include "video_core/cdma_pusher.h"
-#include "video_core/command_classes/nvdec_common.h"
#include "video_core/command_classes/sync_manager.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace Tegra {
CDmaPusher::CDmaPusher(GPU& gpu_)
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 87b49d6ea..cb1d16b71 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -8,6 +8,7 @@
#include <vector>
#include "common/bit_field.h"
+#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 04d0f3a2f..40f7755e8 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <algorithm>
-#include <cstdio>
#include <fstream>
#include <vector>
#include "common/assert.h"
@@ -57,6 +56,18 @@ AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pi
av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
return PREFERRED_CPU_FMT;
}
+
+// List all the currently available hwcontext in ffmpeg
+std::vector<AVHWDeviceType> ListSupportedContexts() {
+ std::vector<AVHWDeviceType> contexts{};
+ AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
+ do {
+ current_device_type = av_hwdevice_iterate_types(current_device_type);
+ contexts.push_back(current_device_type);
+ } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
+ return contexts;
+}
+
} // namespace
void AVFrameDeleter(AVFrame* ptr) {
@@ -77,17 +88,6 @@ Codec::~Codec() {
av_buffer_unref(&av_gpu_decoder);
}
-// List all the currently available hwcontext in ffmpeg
-static std::vector<AVHWDeviceType> ListSupportedContexts() {
- std::vector<AVHWDeviceType> contexts{};
- AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
- do {
- current_device_type = av_hwdevice_iterate_types(current_device_type);
- contexts.push_back(current_device_type);
- } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
- return contexts;
-}
-
bool Codec::CreateGpuAvDevice() {
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static const auto supported_contexts = ListSupportedContexts();
@@ -97,6 +97,8 @@ bool Codec::CreateGpuAvDevice() {
LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
continue;
}
+ // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
+ av_buffer_unref(&av_gpu_decoder);
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
@@ -128,15 +130,19 @@ bool Codec::CreateGpuAvDevice() {
av_codec->name, av_hwdevice_get_type_name(type));
break;
}
- if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
- av_codec_ctx->pix_fmt = config->pix_fmt;
- if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) {
+ if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
+#if defined(__unix__)
+ // Some linux decoding backends are reported to crash with this config method
+ // TODO(ameerj): Properly support this method
+ if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
// skip zero-copy decoders, we don't currently support them
LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
av_hwdevice_get_type_name(type), config->methods);
continue;
}
+#endif
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+ av_codec_ctx->pix_fmt = config->pix_fmt;
return true;
}
}
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index de5672155..661673b4e 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -7,7 +7,7 @@
#include <memory>
#include <string_view>
#include <queue>
-#include "common/common_types.h"
+
#include "video_core/command_classes/nvdec_common.h"
extern "C" {
diff --git a/src/video_core/command_classes/codecs/vp8.cpp b/src/video_core/command_classes/codecs/vp8.cpp
index 32ad0ec16..2f280cb7c 100644
--- a/src/video_core/command_classes/codecs/vp8.cpp
+++ b/src/video_core/command_classes/codecs/vp8.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <array>
#include <vector>
#include "video_core/command_classes/codecs/vp8.h"
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 3b1ed4b3a..af1290016 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -5,7 +5,6 @@
#pragma once
#include <array>
-#include <cstring>
#include <vector>
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
index 7e94799dd..736d2fd0c 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/command_classes/host1x.h
@@ -4,8 +4,6 @@
#pragma once
-#include <vector>
-#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 8d28bd884..0b51e402a 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -6,7 +6,6 @@
#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index d76c5ed56..4c0568c4c 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -9,7 +9,6 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "common/math_util.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 5a1c12076..f7ff92c57 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -10,7 +10,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index f8b8d06ac..c6b8adb56 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -12,7 +12,6 @@
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
namespace Core {
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 8aed16caa..f930e02b6 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -9,8 +9,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
-#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 949e2fae1..4cb4a3d2d 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -6,13 +6,11 @@
#include <array>
#include <cstddef>
-#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/gpu.h"
namespace Core {
class System;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5d6d217bb..7399e760f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -195,7 +196,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- return StartCBData(method);
+ return ProcessCBData(argument);
case MAXWELL3D_REG_INDEX(cb_bind[0]):
return ProcessCBBind(0);
case MAXWELL3D_REG_INDEX(cb_bind[1]):
@@ -208,6 +209,19 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
+ case MAXWELL3D_REG_INDEX(small_index):
+ regs.index_array.count = regs.small_index.count;
+ regs.index_array.first = regs.small_index.first;
+ dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(small_index_2):
+ regs.index_array.count = regs.small_index_2.count;
+ regs.index_array.first = regs.small_index_2.first;
+ dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(topology_override):
+ use_topology_override = true;
+ return;
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
@@ -248,14 +262,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- if (method == cb_data_state.current) {
- regs.reg_array[method] = method_argument;
- ProcessCBData(method_argument);
- return;
- } else if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
-
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
@@ -302,7 +308,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- ProcessCBMultiData(method, base_start, amount);
+ ProcessCBMultiData(base_start, amount);
break;
default:
for (std::size_t i = 0; i < amount; i++) {
@@ -360,6 +366,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
}
+void Maxwell3D::ProcessTopologyOverride() {
+ using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
+ using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
+
+ PrimitiveTopology topology{};
+
+ switch (regs.topology_override) {
+ case PrimitiveTopologyOverride::None:
+ topology = regs.draw.topology;
+ break;
+ case PrimitiveTopologyOverride::Points:
+ topology = PrimitiveTopology::Points;
+ break;
+ case PrimitiveTopologyOverride::Lines:
+ topology = PrimitiveTopology::Lines;
+ break;
+ case PrimitiveTopologyOverride::LineStrip:
+ topology = PrimitiveTopology::LineStrip;
+ break;
+ default:
+ topology = static_cast<PrimitiveTopology>(regs.topology_override);
+ break;
+ }
+
+ if (use_topology_override) {
+ regs.draw.topology.Assign(topology);
+ }
+}
+
void Maxwell3D::FlushMMEInlineDraw() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
@@ -370,6 +405,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
rasterizer->Draw(is_indexed, true);
@@ -529,6 +566,8 @@ void Maxwell3D::DrawArrays() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;
@@ -587,46 +626,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) {
rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
}
-void Maxwell3D::ProcessCBData(u32 value) {
- const u32 id = cb_data_state.id;
- cb_data_state.buffer[id][cb_data_state.counter] = value;
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
- cb_data_state.counter++;
-}
-
-void Maxwell3D::StartCBData(u32 method) {
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
-}
-
-void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
- if (cb_data_state.current != method) {
- if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- }
- const std::size_t id = cb_data_state.id;
- const std::size_t size = amount;
- std::size_t i = 0;
- for (; i < size; i++) {
- cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
- cb_data_state.counter++;
- }
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
-}
-
-void Maxwell3D::FinishCBData() {
+void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
// Write the input value to the current const buffer at the current position.
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
ASSERT(buffer_address != 0);
@@ -634,14 +634,16 @@ void Maxwell3D::FinishCBData() {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
- const GPUVAddr address{buffer_address + cb_data_state.start_pos};
- const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
+ const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+ const size_t copy_size = amount * sizeof(u32);
+ memory_manager.WriteBlock(address, start_base, copy_size);
- const u32 id = cb_data_state.id;
- memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
+ // Increment the current buffer position.
+ regs.const_buffer.cb_pos += static_cast<u32>(copy_size);
+}
- cb_data_state.id = null_cb_data;
- cb_data_state.current = null_cb_data;
+void Maxwell3D::ProcessCBData(u32 value) {
+ ProcessCBMultiData(&value, 1);
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dc9df6c8b..d36dc3daa 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -10,7 +10,6 @@
#include <limits>
#include <optional>
#include <type_traits>
-#include <unordered_map>
#include <vector>
#include "common/assert.h"
@@ -367,6 +366,22 @@ public:
Patches = 0xe,
};
+ // Constants as from NVC0_3D_UNK1970_D3D
+ // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
+ enum class PrimitiveTopologyOverride : u32 {
+ None = 0x0,
+ Points = 0x1,
+ Lines = 0x2,
+ LineStrip = 0x3,
+ Triangles = 0x4,
+ TriangleStrip = 0x5,
+ LinesAdjacency = 0xa,
+ LineStripAdjacency = 0xb,
+ TrianglesAdjacency = 0xc,
+ TriangleStripAdjacency = 0xd,
+ Patches = 0xe,
+ };
+
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
@@ -1200,7 +1215,17 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS_NOINIT(0x7);
+ union {
+ BitField<0, 16, u32> first;
+ BitField<16, 16, u32> count;
+ } small_index;
+
+ union {
+ BitField<0, 16, u32> first;
+ BitField<16, 16, u32> count;
+ } small_index_2;
+
+ INSERT_PADDING_WORDS_NOINIT(0x5);
INSERT_PADDING_WORDS_NOINIT(0x1F);
@@ -1244,7 +1269,11 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
- INSERT_PADDING_WORDS_NOINIT(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+
+ PrimitiveTopologyOverride topology_override;
+
+ INSERT_PADDING_WORDS_NOINIT(0x12);
u32 depth_bounds_enable;
@@ -1520,10 +1549,8 @@ private:
void ProcessSyncPoint();
/// Handles a write to the CB_DATA[i] register.
- void StartCBData(u32 method);
void ProcessCBData(u32 value);
- void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
- void FinishCBData();
+ void ProcessCBMultiData(const u32* start_base, u32 amount);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
@@ -1531,6 +1558,9 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
+ /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
+ void ProcessTopologyOverride();
+
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
@@ -1555,20 +1585,10 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
std::unique_ptr<MacroEngine> macro_engine;
- static constexpr u32 null_cb_data = 0xFFFFFFFF;
- struct CBDataState {
- static constexpr size_t inline_size = 0x4000;
- std::array<std::array<u32, inline_size>, 16> buffer;
- u32 current{null_cb_data};
- u32 id{null_cb_data};
- u32 start_pos{};
- u32 counter{};
- };
- CBDataState cb_data_state;
-
Upload::State upload_state;
bool execute_on{true};
+ bool use_topology_override{false};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1685,6 +1705,7 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(small_index, 0x5F9);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1715,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(topology_override, 0x65C);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 67388d980..1fc1358bc 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -53,7 +53,6 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
- ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
@@ -79,6 +78,7 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
+ ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
+void MaxwellDMA::ReleaseSemaphore() {
+ const auto type = regs.launch_dma.semaphore_type;
+ const GPUVAddr address = regs.semaphore.address;
+ switch (type) {
+ case LaunchDMA::SemaphoreType::NONE:
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
+ memory_manager.Write<u32>(address, regs.semaphore.payload);
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
+ memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
+ memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
+ break;
+ default:
+ UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+ }
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index a04514425..9d0c77793 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -8,10 +8,8 @@
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
-#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
-#include "video_core/gpu.h"
namespace Core {
class System;
@@ -224,6 +222,8 @@ private:
void FastCopyBlockLinearToPitch();
+ void ReleaseSemaphore();
+
Core::System& system;
MemoryManager& memory_manager;
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 34dc6c596..f80d62c80 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -8,8 +8,6 @@
#include <queue>
#include "common/common_types.h"
-#include "common/settings.h"
-#include "core/core.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
index b1d455e30..93349bb78 100644
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@@ -6,41 +6,22 @@
#include "common/common_types.h"
#include "common/math_util.h"
+#include "core/hle/service/nvflinger/buffer_transform_flags.h"
+#include "core/hle/service/nvflinger/pixel_format.h"
namespace Tegra {
+
/**
* Struct describing framebuffer configuration
*/
struct FramebufferConfig {
- enum class PixelFormat : u32 {
- A8B8G8R8_UNORM = 1,
- RGB565_UNORM = 4,
- B8G8R8A8_UNORM = 5,
- };
-
- enum class TransformFlags : u32 {
- /// No transform flags are set
- Unset = 0x00,
- /// Flip source image horizontally (around the vertical axis)
- FlipH = 0x01,
- /// Flip source image vertically (around the horizontal axis)
- FlipV = 0x02,
- /// Rotate source image 90 degrees clockwise
- Rotate90 = 0x04,
- /// Rotate source image 180 degrees
- Rotate180 = 0x03,
- /// Rotate source image 270 degrees clockwise
- Rotate270 = 0x07,
- };
-
VAddr address{};
u32 offset{};
u32 width{};
u32 height{};
u32 stride{};
- PixelFormat pixel_format{};
-
- TransformFlags transform_flags{};
+ Service::android::PixelFormat pixel_format{};
+ Service::android::BufferTransformFlags transform_flags{};
Common::Rectangle<int> crop_rect;
};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ba9ba082f..789af452d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -230,7 +230,7 @@ struct GPU::Impl {
void IncrementSyncPoint(u32 syncpoint_id) {
auto& syncpoint = syncpoints.at(syncpoint_id);
syncpoint++;
- std::lock_guard lock{sync_mutex};
+ std::scoped_lock lock{sync_mutex};
sync_cv.notify_all();
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
if (!interrupt.empty()) {
@@ -252,7 +252,7 @@ struct GPU::Impl {
}
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
- std::lock_guard lock{sync_mutex};
+ std::scoped_lock lock{sync_mutex};
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
[value](u32 in_value) { return in_value == value; });
@@ -263,7 +263,7 @@ struct GPU::Impl {
}
[[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
- std::lock_guard lock{sync_mutex};
+ std::scoped_lock lock{sync_mutex};
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
const auto iter =
std::find_if(interrupt.begin(), interrupt.end(),
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 26b8ea233..97c029140 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -8,6 +8,7 @@
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
#include "video_core/cdma_pusher.h"
#include "video_core/framebuffer_config.h"
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 9547f277a..4e8999915 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -56,7 +56,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
if (next.block) {
// We have to lock the write_lock to ensure that the condition_variable wait not get a
// race between the check and the lock itself.
- std::lock_guard lk(state.write_lock);
+ std::scoped_lock lk{state.write_lock};
state.cv.notify_all();
}
}
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index fd3e41434..190fc6aea 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -14,9 +14,11 @@ set(SHADER_FILES
convert_d24s8_to_abgr8.frag
convert_depth_to_float.frag
convert_float_to_depth.frag
+ convert_s8d24_to_abgr8.frag
full_screen_triangle.vert
fxaa.frag
fxaa.vert
+ opengl_convert_s8d24.comp
opengl_copy_bc4.comp
opengl_present.frag
opengl_present.vert
diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
new file mode 100644
index 000000000..c8a1683b8
--- /dev/null
+++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
@@ -0,0 +1,23 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D depth_tex;
+layout(binding = 1) uniform isampler2D stencil_tex;
+
+layout(location = 0) out vec4 color;
+
+void main() {
+ ivec2 coord = ivec2(gl_FragCoord.xy);
+ uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+ uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+
+ highp uint depth_val =
+ uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
+ lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
+ highp uvec4 components =
+ uvec4((uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu, stencil_val);
+ color.rgba = vec4(components) / (exp2(8.0) - 1.0);
+}
diff --git a/src/video_core/host_shaders/opengl_convert_s8d24.comp b/src/video_core/host_shaders/opengl_convert_s8d24.comp
new file mode 100644
index 000000000..83e1ab176
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_convert_s8d24.comp
@@ -0,0 +1,18 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430 core
+
+layout(local_size_x = 16, local_size_y = 8) in;
+
+layout(binding = 0, rgba8ui) restrict uniform uimage2D destination;
+layout(location = 0) uniform uvec3 size;
+
+void main() {
+ if (any(greaterThanEqual(gl_GlobalInvocationID, size))) {
+ return;
+ }
+ uvec4 components = imageLoad(destination, ivec2(gl_GlobalInvocationID.xy));
+ imageStore(destination, ivec2(gl_GlobalInvocationID.xy), components.wxyz);
+}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 4ff3fa268..722ebd9ad 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -11,7 +11,6 @@
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/memory.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 392f82eb7..8a84bcfa9 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -18,9 +18,7 @@
#include "common/assert.h"
#include "common/settings.h"
-#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index a99c33c37..9756a81d6 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include "common/logging/log.h"
-#include "common/settings.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_base.h"
@@ -27,6 +26,10 @@ void RendererBase::UpdateCurrentFramebufferLayout() {
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
}
+bool RendererBase::IsScreenshotPending() const {
+ return renderer_settings.screenshot_requested;
+}
+
void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout) {
if (renderer_settings.screenshot_requested) {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index c5f974080..30d19b178 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -83,6 +83,9 @@ public:
/// Refreshes the settings common to all renderers
void RefreshBaseSettings();
+ /// Returns true if a screenshot is being processed
+ bool IsScreenshotPending() const;
+
/// Request a screenshot of the next frame
void RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index d4dd10bb6..f1f7b384b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -135,6 +135,20 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
buffer.Create();
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
}
+
+ device_access_memory = [this]() -> u64 {
+ if (device.CanReportMemoryUsage()) {
+ return device.GetCurrentDedicatedVideoMemory() + 512_MiB;
+ }
+ return 2_GiB; // Return minimum requirements
+ }();
+}
+
+u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
+ if (device.CanReportMemoryUsage()) {
+ return device_access_memory - device.GetCurrentDedicatedVideoMemory();
+ }
+ return 2_GiB;
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 060d36427..a8699f28c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -7,9 +7,7 @@
#include <array>
#include <span>
-#include "common/alignment.h"
#include "common/common_types.h"
-#include "common/dynamic_library.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
@@ -91,6 +89,8 @@ public:
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
+ u64 GetDeviceMemoryUsage() const;
+
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
@@ -153,6 +153,14 @@ public:
use_storage_buffers = use_storage_buffers_;
}
+ u64 GetDeviceLocalMemory() const {
+ return device_access_memory;
+ }
+
+ bool CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -186,6 +194,8 @@ private:
std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
u32 index_buffer_offset = 0;
+
+ u64 device_access_memory;
};
struct BufferCacheParams {
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index 50c676365..b0d183b46 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -6,7 +6,6 @@
#include <array>
#include <type_traits>
-#include <utility>
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index e62912a22..597301eeb 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -6,8 +6,6 @@
#include <array>
#include <cstddef>
#include <cstdlib>
-#include <cstring>
-#include <limits>
#include <optional>
#include <span>
#include <stdexcept>
@@ -15,13 +13,15 @@
#include <glad/glad.h>
+#include "common/literals.h"
#include "common/logging/log.h"
-#include "common/scope_exit.h"
#include "common/settings.h"
#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+using namespace Common::Literals;
+
namespace OpenGL {
namespace {
constexpr std::array LIMIT_UBOS = {
@@ -168,6 +168,7 @@ Device::Device() {
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
+ can_report_memory = GLAD_GL_NVX_gpu_memory_info;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -279,4 +280,10 @@ void main() {
})");
}
+u64 Device::GetCurrentDedicatedVideoMemory() const {
+ GLint cur_avail_mem_kb = 0;
+ glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb);
+ return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 95c2e8d38..9bb0b9148 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -20,6 +20,8 @@ public:
[[nodiscard]] std::string GetVendorName() const;
+ u64 GetCurrentDedicatedVideoMemory() const;
+
u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
return max_uniform_buffers[static_cast<size_t>(stage)];
}
@@ -168,6 +170,10 @@ public:
return vendor_name == "ATI Technologies Inc.";
}
+ bool CanReportMemoryUsage() const {
+ return can_report_memory;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -210,6 +216,7 @@ private:
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};
bool has_bool_ref_bug{};
+ bool can_report_memory{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index f8495896c..fd40966d5 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline(
case Settings::ShaderBackend::GLASM:
if (!sources[stage].empty()) {
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
- if (in_parallel) {
- // Make sure program is built before continuing when building in parallel
- glGetString(GL_PROGRAM_ERROR_STRING_NV);
- }
}
break;
case Settings::ShaderBackend::SPIRV:
@@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline(
break;
}
}
- if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
- // Make sure programs have built if we are building shaders in parallel
- for (OGLProgram& program : source_programs) {
- if (program.handle != 0) {
- GLint status{};
- glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
- }
- }
+ if (in_parallel) {
+ std::scoped_lock lock{built_mutex};
+ built_fence.Create();
+ // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+ glFlush();
+ built_condvar.notify_one();
+ } else {
+ is_built = true;
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
- is_built = true;
- built_condvar.notify_one();
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
@@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
- if (!is_built.load(std::memory_order::relaxed)) {
+ if (!IsBuilt()) {
WaitForBuild();
}
const bool use_assembly{assembly_programs[0].handle != 0};
@@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
}
void GraphicsPipeline::WaitForBuild() {
- std::unique_lock lock{built_mutex};
- built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+ if (built_fence.handle == 0) {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+ }
+ ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+ is_built = true;
+}
+
+bool GraphicsPipeline::IsBuilt() noexcept {
+ if (is_built) {
+ return true;
+ }
+ if (built_fence.handle == 0) {
+ return false;
+ }
+ // Timeout of zero means this is non-blocking
+ const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
+ ASSERT(sync_status != GL_WAIT_FAILED);
+ is_built = sync_status != GL_TIMEOUT_EXPIRED;
+ return is_built;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 4e28d9a42..4f8049717 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -14,7 +14,6 @@
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
@@ -100,9 +99,7 @@ public:
return writes_global_memory;
}
- [[nodiscard]] bool IsBuilt() const noexcept {
- return is_built.load(std::memory_order::relaxed);
- }
+ [[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {
@@ -154,7 +151,8 @@ private:
std::mutex built_mutex;
std::condition_variable built_condvar;
- std::atomic_bool is_built{false};
+ OGLSync built_fence{};
+ bool is_built{false};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index acebbf5f4..9e6c50055 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -3,15 +3,12 @@
// Refer to the license.txt file included.
#include <algorithm>
-#include <cstring>
#include <memory>
-#include <unordered_map>
#include <utility>
#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 142412a8e..8ef79753f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -6,9 +6,7 @@
#include <array>
#include <bitset>
#include <memory>
-#include <string>
#include <string_view>
-#include <tuple>
#include <utility>
#include <glad/glad.h>
@@ -17,8 +15,9 @@
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "common/settings.h"
-#include "core/memory.h"
+
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@@ -212,6 +211,7 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
+ SCOPE_EXIT({ gpu.TickWork(); });
query_cache.UpdateCounters();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
@@ -267,8 +267,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
-
- gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute() {
@@ -522,6 +520,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
+ screen_info.texture.width = image_view->size.width;
+ screen_info.texture.height = image_view->size.height;
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
@@ -559,12 +559,19 @@ void RasterizerOpenGL::SyncViewport() {
const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports;
const bool dirty_clip_control = flags[Dirty::ClipControl];
- if (dirty_clip_control || flags[Dirty::FrontFace]) {
+ if (dirty_viewport || dirty_clip_control || flags[Dirty::FrontFace]) {
flags[Dirty::FrontFace] = false;
GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
+ bool flip_faces = false;
if (regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y < 0.0f) {
+ flip_faces = !flip_faces;
+ }
+ if (regs.viewport_transform[0].scale_z < 0.0f) {
+ flip_faces = !flip_faces;
+ }
+ if (flip_faces) {
switch (mode) {
case GL_CW:
mode = GL_CCW;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 98f6fd342..c79461d59 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,20 +5,14 @@
#pragma once
#include <array>
-#include <atomic>
#include <cstddef>
-#include <memory>
#include <optional>
-#include <tuple>
-#include <utility>
#include <boost/container/static_vector.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/engines/const_buffer_info.h"
-#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
@@ -26,12 +20,8 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/textures/texture.h"
namespace Core::Memory {
class Memory;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 5e7101d28..f6839a657 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -3,9 +3,7 @@
// Refer to the license.txt file included.
#include <string_view>
-#include <utility>
#include <glad/glad.h>
-#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f71e01a34..05c5e702c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -14,10 +14,8 @@
#include "common/fs/fs.h"
#include "common/fs/path_util.h"
#include "common/logging/log.h"
-#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/thread_worker.h"
-#include "core/core.h"
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "shader_recompiler/backend/glsl/emit_glsl.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
@@ -29,7 +27,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -261,7 +258,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
[this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
ctx->pools.ReleaseContents();
auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
- std::lock_guard lock{state.mutex};
+ std::scoped_lock lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
}
@@ -283,7 +280,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
}
ctx->pools.ReleaseContents();
auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
- std::lock_guard lock{state.mutex};
+ std::scoped_lock lock{state.mutex};
if (pipeline) {
graphics_cache.emplace(key, std::move(pipeline));
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a34110b37..06d4b38bb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -4,18 +4,13 @@
#pragma once
-#include <array>
#include <filesystem>
#include <stop_token>
#include <unordered_map>
-#include <glad/glad.h>
-
#include "common/common_types.h"
#include "common/thread_worker.h"
-#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/host_translate_info.h"
-#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index d432072ad..129966e72 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -6,7 +6,6 @@
#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 4e1a2a8e1..a64ef37dc 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -5,14 +5,10 @@
#pragma once
#include <span>
-#include <string>
#include <string_view>
-#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
-#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 5864c7c07..550ed6d36 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -9,7 +9,6 @@
#include <glad/glad.h>
#include "common/common_types.h"
-#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 2e67922a6..f0cb29dca 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -5,7 +5,6 @@
#pragma once
#include <array>
-#include <memory>
#include <span>
#include <utility>
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 3c1f79a27..f8c6e5c7e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -182,6 +182,26 @@ GLenum AttachmentType(PixelFormat format) {
}
}
+GLint ConvertA5B5G5R1_UNORM(SwizzleSource source) {
+ switch (source) {
+ case SwizzleSource::Zero:
+ return GL_ZERO;
+ case SwizzleSource::R:
+ return GL_ALPHA;
+ case SwizzleSource::G:
+ return GL_BLUE;
+ case SwizzleSource::B:
+ return GL_GREEN;
+ case SwizzleSource::A:
+ return GL_RED;
+ case SwizzleSource::OneInt:
+ case SwizzleSource::OneFloat:
+ return GL_ONE;
+ }
+ UNREACHABLE_MSG("Invalid swizzle source={}", source);
+ return GL_NONE;
+}
+
void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
switch (format) {
case PixelFormat::D24_UNORM_S8_UINT:
@@ -192,6 +212,12 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
TextureMode(format, swizzle[0] == SwizzleSource::R));
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
+ case PixelFormat::A5B5G5R1_UNORM: {
+ std::array<GLint, 4> gl_swizzle;
+ std::ranges::transform(swizzle, gl_swizzle.begin(), ConvertA5B5G5R1_UNORM);
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+ return;
+ }
default:
break;
}
@@ -409,8 +435,8 @@ ImageBufferMap::~ImageBufferMap() {
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
StateTracker& state_tracker_)
- : device{device_}, state_tracker{state_tracker_},
- util_shaders(program_manager), resolution{Settings::values.resolution_info} {
+ : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
+ format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -484,6 +510,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
rescale_read_fbos[i].Create();
}
}
+
+ device_access_memory = [this]() -> u64 {
+ if (device.CanReportMemoryUsage()) {
+ return device.GetCurrentDedicatedVideoMemory() + 512_MiB;
+ }
+ return 2_GiB; // Return minimum requirements
+ }();
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -500,13 +533,11 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
-u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
- if (GLAD_GL_NVX_gpu_memory_info) {
- GLint cur_avail_mem_kb = 0;
- glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb);
- return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
+u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
+ if (device.CanReportMemoryUsage()) {
+ return device_access_memory - device.GetCurrentDedicatedVideoMemory();
}
- return 2_GiB; // Return minimum requirements
+ return 2_GiB;
}
void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
@@ -686,6 +717,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
}
if (IsConverted(runtime->device, info.format, info.type)) {
flags |= ImageFlagBits::Converted;
+ flags |= ImageFlagBits::CostlyLoad;
gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
@@ -1319,6 +1351,9 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
Framebuffer::~Framebuffer() = default;
+FormatConversionPass::FormatConversionPass(UtilShaders& util_shaders_)
+ : util_shaders{util_shaders_} {}
+
void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies) {
const GLenum dst_target = ImageTarget(dst_image.info);
@@ -1351,6 +1386,12 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
dst_origin.z, region.width, region.height, region.depth,
dst_image.GlFormat(), dst_image.GlType(), nullptr);
}
+
+ // Swap component order of S8D24 to ABGR8 reinterprets
+ if (src_image.info.format == PixelFormat::D24_UNORM_S8_UINT &&
+ dst_image.info.format == PixelFormat::A8B8G8R8_UNORM) {
+ util_shaders.ConvertS8D24(dst_image, copies);
+ }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 7f425631f..672fa8dde 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -10,6 +10,7 @@
#include <glad/glad.h>
#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
@@ -21,7 +22,6 @@ struct ResolutionScalingInfo;
namespace OpenGL {
-class Device;
class ProgramManager;
class StateTracker;
@@ -55,13 +55,14 @@ struct FormatProperties {
class FormatConversionPass {
public:
- FormatConversionPass() = default;
+ explicit FormatConversionPass(UtilShaders& util_shaders);
~FormatConversionPass() = default;
void ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
private:
+ UtilShaders& util_shaders;
OGLBuffer intermediate_pbo;
size_t pbo_size{};
};
@@ -83,7 +84,15 @@ public:
ImageBufferMap DownloadStagingBuffer(size_t size);
- u64 GetDeviceLocalMemory() const;
+ u64 GetDeviceLocalMemory() const {
+ return device_access_memory;
+ }
+
+ u64 GetDeviceMemoryUsage() const;
+
+ bool CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+ }
bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
return true;
@@ -172,6 +181,7 @@ private:
std::array<OGLFramebuffer, 4> rescale_draw_fbos;
std::array<OGLFramebuffer, 4> rescale_read_fbos;
const Settings::ResolutionScalingInfo& resolution;
+ u64 device_access_memory;
};
class Image : public VideoCommon::ImageBase {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index db5bf1d30..03adf3d4c 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -30,6 +30,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
@@ -87,6 +88,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R4G4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f81c1b233..3a3c213bb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,7 +5,6 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
-#include <cstring>
#include <memory>
#include <glad/glad.h>
@@ -15,11 +14,9 @@
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/telemetry.h"
-#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
-#include "core/perf_stats.h"
#include "core/telemetry_session.h"
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
@@ -211,6 +208,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
+ framebuffer_width = framebuffer.width;
+ framebuffer_height = framebuffer.height;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
screen_info.was_accelerated =
@@ -326,12 +325,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
GLint internal_format;
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ case Service::android::PixelFormat::Rgba8888:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
- case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ case Service::android::PixelFormat::Rgb565:
internal_format = GL_RGB565;
texture.gl_format = GL_RGB;
texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
@@ -467,8 +466,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
auto right = texcoords.right;
- if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) {
- if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) {
+ if (framebuffer_transform_flags != Service::android::BufferTransformFlags::Unset) {
+ if (framebuffer_transform_flags == Service::android::BufferTransformFlags::FlipV) {
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
@@ -483,9 +482,12 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented");
ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
+ f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
+ f32 scale_v =
+ static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
+
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
- f32 scale_u = 1.f, scale_v = 1.f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.texture.width);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cda333cad..ae9558a33 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -8,10 +8,12 @@
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/math_util.h"
+
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@@ -44,7 +46,7 @@ struct TextureInfo {
GLsizei height;
GLenum gl_format;
GLenum gl_type;
- Tegra::FramebufferConfig::PixelFormat pixel_format;
+ Service::android::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for the Switch screen
@@ -133,8 +135,10 @@ private:
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
- Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
+ Service::android::BufferTransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
+ u32 framebuffer_width;
+ u32 framebuffer_height;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 897c380b3..04c482a09 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -13,6 +13,7 @@
#include "video_core/host_shaders/astc_decoder_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
+#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -50,7 +51,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
- copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
+ copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
+ convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
@@ -248,6 +250,26 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
program_manager.RestoreGuestCompute();
}
+void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copies) {
+ static constexpr GLuint BINDING_DESTINATION = 0;
+ static constexpr GLuint LOC_SIZE = 0;
+
+ program_manager.BindComputeProgram(convert_s8d24_program.handle);
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_subresource.base_layer == 0);
+ ASSERT(copy.src_subresource.num_layers == 1);
+ ASSERT(copy.dst_subresource.base_layer == 0);
+ ASSERT(copy.dst_subresource.num_layers == 1);
+
+ glUniform3ui(LOC_SIZE, copy.extent.width, copy.extent.height, copy.extent.depth);
+ glBindImageTexture(BINDING_DESTINATION, dst_image.StorageHandle(),
+ copy.dst_subresource.base_level, GL_TRUE, 0, GL_READ_WRITE, GL_RGBA8UI);
+ glDispatchCompute(Common::DivCeil(copy.extent.width, 16u),
+ Common::DivCeil(copy.extent.height, 8u), copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 5de95ea7a..5c132e67f 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -39,6 +39,8 @@ public:
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
+ void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
+
private:
ProgramManager& program_manager;
@@ -49,6 +51,7 @@ private:
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
+ OGLProgram convert_s8d24_program;
};
GLenum StoreFormat(u32 bytes_per_block);
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 2c3914459..abda1c490 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -9,6 +9,7 @@
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
+#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
@@ -366,16 +367,14 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
+ blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
+ convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
- nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
- if (device.IsExtShaderStencilExportSupported()) {
- blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
- }
-}
+ nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
BlitImageHelper::~BlitImageHelper() = default;
@@ -474,6 +473,13 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
+void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
+ ImageView& src_image_view) {
+ ConvertPipelineColorTargetEx(convert_s8d24_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
+ convert_s8d24_to_abgr8_frag);
+ ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
+}
+
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 85e7dca5b..29ee0f67a 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -4,8 +4,6 @@
#pragma once
-#include <compare>
-
#include "video_core/engines/fermi_2d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/texture_cache/types.h"
@@ -56,6 +54,8 @@ public:
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+ void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -99,6 +99,7 @@ private:
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
+ vk::ShaderModule convert_s8d24_to_abgr8_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
@@ -112,6 +113,7 @@ private:
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
+ vk::Pipeline convert_s8d24_to_abgr8_pipeline;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index d70153df3..c2259ac5f 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -4,9 +4,6 @@
#include <algorithm>
#include <cstring>
-#include <tuple>
-
-#include <boost/functional/hash.hpp>
#include "common/bit_cast.h"
#include "common/cityhash.h"
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 1c136c410..a2c6d0e6c 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -127,6 +127,7 @@ struct FormatTuple {
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
+ {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled)
{VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
{VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM
{VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT
@@ -184,6 +185,7 @@ struct FormatTuple {
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
{VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB
{VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM
+ {VK_FORMAT_R4G4_UNORM_PACK8}, // R4G4_UNORM
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
{VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 8a9616039..1c1f420f2 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -4,7 +4,6 @@
#pragma once
-#include "common/common_types.h"
#include "shader_recompiler/stage.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 11c160570..c25d469e6 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -8,7 +8,6 @@
#include <boost/container/small_vector.hpp>
-#include "common/assert.h"
#include "common/common_types.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/shader_info.h"
@@ -16,7 +15,6 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/types.h"
-#include "video_core/textures/texture.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
diff --git a/src/video_core/renderer_vulkan/pipeline_statistics.cpp b/src/video_core/renderer_vulkan/pipeline_statistics.cpp
index bfec931a6..7ccadf084 100644
--- a/src/video_core/renderer_vulkan/pipeline_statistics.cpp
+++ b/src/video_core/renderer_vulkan/pipeline_statistics.cpp
@@ -57,7 +57,7 @@ void PipelineStatistics::Collect(VkPipeline pipeline) {
stage_stats.basic_block_count = GetUint64(statistic);
}
}
- std::lock_guard lock{mutex};
+ std::scoped_lock lock{mutex};
collected_stats.push_back(stage_stats);
}
}
@@ -66,7 +66,7 @@ void PipelineStatistics::Report() const {
double num{};
Stats total;
{
- std::lock_guard lock{mutex};
+ std::scoped_lock lock{mutex};
for (const Stats& stats : collected_stats) {
total.code_size += stats.code_size;
total.register_count += stats.register_count;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 74822814d..ef57fdfa4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,16 +13,15 @@
#include <fmt/format.h>
#include "common/logging/log.h"
+#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/telemetry.h"
-#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/telemetry_session.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
-#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 6dc985109..9680108b6 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -6,7 +6,6 @@
#include <memory>
#include <string>
-#include <vector>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 621a6a071..b866e9103 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -6,7 +6,6 @@
#include <array>
#include <cstring>
#include <memory>
-#include <tuple>
#include <vector>
#include "common/assert.h"
@@ -28,7 +27,6 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_fsr.h"
-#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -96,11 +94,11 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ case Service::android::PixelFormat::Rgba8888:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ case Service::android::PixelFormat::Rgb565:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
- case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
+ case Service::android::PixelFormat::Bgra8888:
return VK_FORMAT_B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
@@ -1392,9 +1390,9 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
auto right = texcoords.right;
switch (framebuffer_transform_flags) {
- case Tegra::FramebufferConfig::TransformFlags::Unset:
+ case Service::android::BufferTransformFlags::Unset:
break;
- case Tegra::FramebufferConfig::TransformFlags::FlipV:
+ case Service::android::BufferTransformFlags::FlipV:
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
@@ -1408,8 +1406,9 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
- f32 scale_u = 1.0f;
- f32 scale_v = 1.0f;
+ f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
+ f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
+
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
if (!fsr) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5ffd93499..def838c34 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -141,6 +141,18 @@ StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Download);
}
+u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
+ return device.GetDeviceLocalMemory();
+}
+
+u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
+ return device.GetDeviceMemoryUsage();
+}
+
+bool BufferCacheRuntime::CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+}
+
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 1ee0d8420..d7fdd18ff 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -65,6 +65,12 @@ public:
void Finish();
+ u64 GetDeviceLocalMemory() const;
+
+ u64 GetDeviceMemoryUsage() const;
+
+ bool CanReportMemoryUsage() const;
+
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 3e96c0f60..713794410 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -2,12 +2,11 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <cstring>
+#include <array>
#include <memory>
#include <optional>
#include <utility>
-#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
@@ -22,7 +21,6 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
-#include "video_core/textures/astc.h"
#include "video_core/textures/decoders.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -292,7 +290,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
- const std::array push_constants{base_vertex, index_shift};
+ const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index de36bcdb7..97b3594c2 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -77,7 +77,7 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
- std::lock_guard lock{build_mutex};
+ std::scoped_lock lock{build_mutex};
is_built = true;
build_condvar.notify_one();
if (shader_notify) {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 8c4b0a301..c64bd9a06 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -11,7 +11,6 @@
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
-#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 3bec48d14..0c1098c8f 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -9,7 +9,6 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 2f8322d29..cf9f4adbf 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -9,7 +9,6 @@
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index d514b71d0..8959d6059 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -258,7 +258,7 @@ GraphicsPipeline::GraphicsPipeline(
pipeline_statistics->Collect(*pipeline);
}
- std::lock_guard lock{build_mutex};
+ std::scoped_lock lock{build_mutex};
is_built = true;
build_condvar.notify_one();
if (shader_notify) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a633b73e5..336d1e9dc 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -16,13 +16,11 @@
#include "common/microprofile.h"
#include "common/thread_worker.h"
#include "core/core.h"
-#include "core/memory.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate_program.h"
#include "shader_recompiler/program_header.h"
-#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@@ -406,7 +404,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
ShaderPools pools;
auto pipeline{CreateComputePipeline(pools, key, env, state.statistics.get(), false)};
- std::lock_guard lock{state.mutex};
+ std::scoped_lock lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
}
@@ -436,7 +434,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs),
state.statistics.get(), false)};
- std::lock_guard lock{state.mutex};
+ std::scoped_lock lock{state.mutex};
graphics_cache.emplace(key, std::move(pipeline));
++state.built;
if (state.has_loaded) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 4c135b5dd..579e25a4a 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -7,11 +7,9 @@
#include <array>
#include <cstddef>
#include <filesystem>
-#include <iosfwd>
#include <memory>
#include <type_traits>
#include <unordered_map>
-#include <utility>
#include <vector>
#include "common/common_types.h"
@@ -29,7 +27,6 @@
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/shader_cache.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2227d9197..fa87d37f8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -6,15 +6,12 @@
#include <array>
#include <memory>
#include <mutex>
-#include <vector>
-#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
-#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/blit_image.h"
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 5af2e275b..c25036fb3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -5,10 +5,6 @@
#pragma once
#include <array>
-#include <bitset>
-#include <memory>
-#include <utility>
-#include <vector>
#include <boost/container/static_vector.hpp>
@@ -17,14 +13,12 @@
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
-#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
index 451ffe019..d22bb6694 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -36,7 +36,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat
RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
- std::lock_guard lock{mutex};
+ std::scoped_lock lock{mutex};
const auto [pair, is_new] = cache.try_emplace(key);
if (!is_new) {
return *pair->second;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 7d9d4f7ba..6a9416457 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -4,7 +4,6 @@
#include <memory>
#include <mutex>
-#include <optional>
#include <thread>
#include <utility>
@@ -74,7 +73,7 @@ void VKScheduler::DispatchWork() {
return;
}
{
- std::lock_guard lock{work_mutex};
+ std::scoped_lock lock{work_mutex};
work_queue.push(std::move(chunk));
}
work_cv.notify_one();
@@ -158,7 +157,7 @@ void VKScheduler::WorkerThread(std::stop_token stop_token) {
if (has_submit) {
AllocateWorkerCommandBuffer();
}
- std::lock_guard reserve_lock{reserve_mutex};
+ std::scoped_lock reserve_lock{reserve_mutex};
chunk_reserve.push_back(std::move(work));
} while (!stop_token.stop_requested());
}
@@ -283,7 +282,7 @@ void VKScheduler::EndRenderPass() {
}
void VKScheduler::AcquireNewChunk() {
- std::lock_guard lock{reserve_mutex};
+ std::scoped_lock lock{reserve_mutex};
if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index e69aa136b..25c5e6ca1 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,7 +4,6 @@
#pragma once
-#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <memory>
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index aaad4f292..e8e339f3c 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -3,9 +3,7 @@
// Refer to the license.txt file included.
#include <cstring>
-#include <memory>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 5d5329abf..64a58304b 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -118,7 +118,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.image = nullptr,
.buffer = *stream_buffer,
};
- const auto memory_properties = device.GetPhysical().GetMemoryProperties();
+ const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index c00913f55..1e597f98c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -5,7 +5,6 @@
#include <algorithm>
#include <array>
#include <cstddef>
-#include <iterator>
#include "common/common_types.h"
#include "core/core.h"
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 40a149832..8240c83e1 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -8,7 +8,6 @@
#include <limits>
#include "common/common_types.h"
-#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 8972a6921..ce744f4ca 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -7,11 +7,9 @@
#include <limits>
#include <vector>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
-#include "core/frontend/framebuffer_layout.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 0f62779de..49691ce0c 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -438,6 +438,32 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}
}
+[[nodiscard]] SwizzleSource SwapGreenRed(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::R:
+ return SwizzleSource::G;
+ case SwizzleSource::G:
+ return SwizzleSource::R;
+ default:
+ return value;
+ }
+}
+
+[[nodiscard]] SwizzleSource SwapSpecial(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::A:
+ return SwizzleSource::R;
+ case SwizzleSource::R:
+ return SwizzleSource::A;
+ case SwizzleSource::G:
+ return SwizzleSource::B;
+ case SwizzleSource::B:
+ return SwizzleSource::G;
+ default:
+ return value;
+ }
+}
+
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
@@ -554,14 +580,25 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
};
}
-[[nodiscard]] bool IsFormatFlipped(PixelFormat format, bool emulate_bgr565) {
+void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4>& swizzle,
+ bool emulate_bgr565) {
switch (format) {
case PixelFormat::A1B5G5R5_UNORM:
- return true;
+ std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
+ break;
case PixelFormat::B5G6R5_UNORM:
- return emulate_bgr565;
+ if (emulate_bgr565) {
+ std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
+ }
+ break;
+ case PixelFormat::A5B5G5R1_UNORM:
+ std::ranges::transform(swizzle, swizzle.begin(), SwapSpecial);
+ break;
+ case PixelFormat::R4G4_UNORM:
+ std::ranges::transform(swizzle, swizzle.begin(), SwapGreenRed);
+ break;
default:
- return false;
+ break;
}
}
@@ -781,11 +818,6 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
!device.IsExtShaderStencilExportSupported()) {
return true;
}
- if (VideoCore::Surface::GetFormatType(src.info.format) ==
- VideoCore::Surface::SurfaceType::DepthStencil &&
- !device.IsExtShaderStencilExportSupported()) {
- return true;
- }
if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT ||
src.info.format == PixelFormat::D32_FLOAT_S8_UINT) {
return true;
@@ -1070,6 +1102,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
+ if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
+ return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view);
+ }
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
@@ -1191,6 +1226,14 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
+u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
+ return device.GetDeviceMemoryUsage();
+}
+
+bool TextureCacheRuntime::CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+}
+
void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
@@ -1205,6 +1248,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
} else {
flags |= VideoCommon::ImageFlagBits::Converted;
}
+ flags |= VideoCommon::ImageFlagBits::CostlyLoad;
}
if (runtime->device.HasDebuggingToolAttached()) {
original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
@@ -1444,8 +1488,7 @@ bool Image::BlitScaleHelper(bool scale_up) {
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
src_region, operation, BLIT_OPERATION);
- } else if (!runtime->device.IsBlitDepthStencilSupported() &&
- aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
if (!blit_framebuffer) {
blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
}
@@ -1490,9 +1533,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
};
if (!info.IsRenderTarget()) {
swizzle = info.Swizzle();
- if (IsFormatFlipped(format, device->MustEmulateBGR565())) {
- std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
- }
+ TryTransformSwizzleIfNeeded(format, swizzle, device->MustEmulateBGR565());
if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index c81130dd2..cb15b4a1c 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -55,6 +55,10 @@ public:
u64 GetDeviceLocalMemory() const;
+ u64 GetDeviceMemoryUsage() const;
+
+ bool CanReportMemoryUsage() const;
+
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 0df3a7fe9..89f1b508d 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -5,7 +5,6 @@
#include <variant>
#include <boost/container/static_vector.hpp>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d7de4c490..971a4eb34 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -6,7 +6,6 @@
#include <array>
-#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 87636857d..75031767a 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -25,7 +25,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
}
void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
- std::lock_guard lock{invalidation_mutex};
+ std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
}
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 3e673c437..3c22124c4 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <algorithm>
-#include <bit>
#include <filesystem>
#include <fstream>
#include <memory>
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index bcaf5f575..ce8925896 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -4,7 +4,6 @@
#include <atomic>
#include <chrono>
-#include <optional>
#include "video_core/shader_notify.h"
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index 4d8d52071..538cda28a 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -6,7 +6,6 @@
#include <atomic>
#include <chrono>
-#include <optional>
namespace VideoCore {
class ShaderNotify {
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index a36015c8c..5f428d35d 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -190,13 +190,13 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
}
}
-PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
+PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format) {
switch (format) {
- case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ case Service::android::PixelFormat::Rgba8888:
return PixelFormat::A8B8G8R8_UNORM;
- case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ case Service::android::PixelFormat::Rgb565:
return PixelFormat::R5G6B5_UNORM;
- case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
+ case Service::android::PixelFormat::Bgra8888:
return PixelFormat::B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unimplemented format={}", format);
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 33e8d24ab..86fea61ae 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,6 +25,7 @@ enum class PixelFormat {
A2B10G10R10_UNORM,
A2B10G10R10_UINT,
A1B5G5R5_UNORM,
+ A5B5G5R1_UNORM,
R8_UNORM,
R8_SNORM,
R8_SINT,
@@ -82,6 +83,7 @@ enum class PixelFormat {
BC3_SRGB,
BC7_SRGB,
A4B4G4R4_UNORM,
+ R4G4_UNORM,
ASTC_2D_4X4_SRGB,
ASTC_2D_8X8_SRGB,
ASTC_2D_8X5_SRGB,
@@ -156,6 +158,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
1, // A2B10G10R10_UNORM
1, // A2B10G10R10_UINT
1, // A1B5G5R5_UNORM
+ 1, // A5B5G5R1_UNORM
1, // R8_UNORM
1, // R8_SNORM
1, // R8_SINT
@@ -213,6 +216,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
4, // BC3_SRGB
4, // BC7_SRGB
1, // A4B4G4R4_UNORM
+ 1, // R4G4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
8, // ASTC_2D_8X5_SRGB
@@ -256,6 +260,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
1, // A2B10G10R10_UNORM
1, // A2B10G10R10_UINT
1, // A1B5G5R5_UNORM
+ 1, // A5B5G5R1_UNORM
1, // R8_UNORM
1, // R8_SNORM
1, // R8_SINT
@@ -313,6 +318,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
4, // BC3_SRGB
4, // BC7_SRGB
1, // A4B4G4R4_UNORM
+ 1, // R4G4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
5, // ASTC_2D_8X5_SRGB
@@ -356,6 +362,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
32, // A2B10G10R10_UNORM
32, // A2B10G10R10_UINT
16, // A1B5G5R5_UNORM
+ 16, // A5B5G5R1_UNORM
8, // R8_UNORM
8, // R8_SNORM
8, // R8_SINT
@@ -413,6 +420,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
128, // BC3_SRGB
128, // BC7_UNORM
16, // A4B4G4R4_UNORM
+ 8, // R4G4_UNORM
128, // ASTC_2D_4X4_SRGB
128, // ASTC_2D_8X8_SRGB
128, // ASTC_2D_8X5_SRGB
@@ -460,7 +468,7 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);
-PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format);
+PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format);
SurfaceType GetFormatType(PixelFormat pixel_format);
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
index 3a03b786f..318bd5214 100644
--- a/src/video_core/texture_cache/descriptor_table.h
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -9,7 +9,6 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
-#include "common/logging/log.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index afa807d5d..20e64a7c2 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -63,6 +63,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::A1B5G5R5_UNORM;
case Hash(TextureFormat::A4B4G4R4, UNORM):
return PixelFormat::A4B4G4R4_UNORM;
+ case Hash(TextureFormat::G4R4, UNORM):
+ return PixelFormat::R4G4_UNORM;
+ case Hash(TextureFormat::A5B5G5R1, UNORM):
+ return PixelFormat::A5B5G5R1_UNORM;
case Hash(TextureFormat::R8, UNORM):
return PixelFormat::R8_UNORM;
case Hash(TextureFormat::R8, SNORM):
@@ -143,6 +147,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::S8_UINT_D24_UNORM;
case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
return PixelFormat::S8_UINT_D24_UNORM;
+ case Hash(TextureFormat::D24S8, UNORM, UINT, UINT, UINT, LINEAR):
+ return PixelFormat::D24_UNORM_S8_UINT;
case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
return PixelFormat::D32_FLOAT_S8_UINT;
case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index b2c81057b..6f5afc5a9 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -38,6 +38,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "A2B10G10R10_UINT";
case PixelFormat::A1B5G5R5_UNORM:
return "A1B5G5R5_UNORM";
+ case PixelFormat::A5B5G5R1_UNORM:
+ return "A5B5G5R1_UNORM";
case PixelFormat::R8_UNORM:
return "R8_UNORM";
case PixelFormat::R8_SNORM:
@@ -152,6 +154,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "BC7_SRGB";
case PixelFormat::A4B4G4R4_UNORM:
return "A4B4G4R4_UNORM";
+ case PixelFormat::R4G4_UNORM:
+ return "R4G4_UNORM";
case PixelFormat::ASTC_2D_4X4_SRGB:
return "ASTC_2D_4X4_SRGB";
case PixelFormat::ASTC_2D_8X8_SRGB:
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 89c111c00..dd0106432 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -33,11 +33,12 @@ enum class ImageFlagBits : u32 {
///< garbage collection priority
Alias = 1 << 11, ///< This image has aliases and has priority on garbage
///< collection
+ CostlyLoad = 1 << 12, ///< Protected from low-tier GC as it is costly to load back.
// Rescaler
- Rescaled = 1 << 12,
- CheckingRescalable = 1 << 13,
- IsRescalable = 1 << 14,
+ Rescaled = 1 << 13,
+ CheckingRescalable = 1 << 14,
+ IsRescalable = 1 << 15,
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
index 0cb227d69..f8f13e84c 100644
--- a/src/video_core/texture_cache/render_targets.h
+++ b/src/video_core/texture_cache/render_targets.h
@@ -6,7 +6,6 @@
#include <algorithm>
#include <span>
-#include <utility>
#include "common/bit_cast.h"
#include "video_core/texture_cache/types.h"
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 50df06409..6aabaef7b 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -5,9 +5,7 @@
#pragma once
#include <algorithm>
-#include <array>
#include <bit>
-#include <concepts>
#include <numeric>
#include <type_traits>
#include <utility>
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 198bb0cfb..8fef74117 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -50,14 +50,20 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
void(slot_samplers.insert(runtime, sampler_descriptor));
if constexpr (HAS_DEVICE_MEMORY_INFO) {
- const auto device_memory = runtime.GetDeviceLocalMemory();
- const u64 possible_expected_memory = (device_memory * 4) / 10;
- const u64 possible_critical_memory = (device_memory * 7) / 10;
- expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB);
- critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB);
- minimum_memory = 0;
+ const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
+ const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
+ const s64 min_spacing_critical = device_memory - 1_GiB;
+ const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
+ const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
+ const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
+ expected_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
+ DEFAULT_EXPECTED_MEMORY));
+ critical_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
+ DEFAULT_CRITICAL_MEMORY));
+ minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2);
} else {
- // On OpenGL we can be more conservatives as the driver takes care.
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = 0;
@@ -66,18 +72,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
template <class P>
void TextureCache<P>::RunGarbageCollector() {
- const bool high_priority_mode = total_used_memory >= expected_memory;
- const bool aggressive_mode = total_used_memory >= critical_memory;
- const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
- size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10);
- const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
+ bool high_priority_mode = total_used_memory >= expected_memory;
+ bool aggressive_mode = total_used_memory >= critical_memory;
+ const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
+ size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
+ const auto clean_up = [this, &num_iterations, &high_priority_mode,
+ &aggressive_mode](ImageId image_id) {
if (num_iterations == 0) {
return true;
}
--num_iterations;
auto& image = slot_images[image_id];
- const bool must_download = image.IsSafeDownload();
- if (!high_priority_mode && must_download) {
+ const bool must_download =
+ image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
+ if (!high_priority_mode &&
+ (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
return false;
}
if (must_download) {
@@ -92,6 +101,18 @@ void TextureCache<P>::RunGarbageCollector() {
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
+ if (total_used_memory < critical_memory) {
+ if (aggressive_mode) {
+ // Sink the aggresiveness.
+ num_iterations >>= 2;
+ aggressive_mode = false;
+ return false;
+ }
+ if (high_priority_mode && total_used_memory < expected_memory) {
+ num_iterations >>= 1;
+ high_priority_mode = false;
+ }
+ }
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
@@ -99,6 +120,10 @@ void TextureCache<P>::RunGarbageCollector() {
template <class P>
void TextureCache<P>::TickFrame() {
+ // If we can obtain the memory info, use it instead of the estimate.
+ if (runtime.CanReportMemoryUsage()) {
+ total_used_memory = runtime.GetDeviceMemoryUsage();
+ }
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
@@ -106,6 +131,7 @@ void TextureCache<P>::TickFrame() {
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
runtime.TickFrame();
+ critical_gc = 0;
++frame_tick;
}
@@ -343,7 +369,7 @@ template <bool has_blacklists>
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
std::span<ImageViewInOut> views) {
- bool has_blacklisted;
+ bool has_blacklisted = false;
do {
has_deleted_images = false;
if constexpr (has_blacklists) {
@@ -1052,6 +1078,9 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
+ if (True(overlap.flags & ImageFlagBits::GpuModified)) {
+ new_image.flags |= ImageFlagBits::GpuModified;
+ }
if (overlap.info.num_samples != new_image.info.num_samples) {
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
} else {
@@ -1414,6 +1443,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
+ if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) {
+ RunGarbageCollector();
+ critical_gc++;
+ }
image.lru_index = lru_cache.Insert(image_id, frame_tick);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
@@ -1704,6 +1737,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
aliased_images.push_back(&aliased);
any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
+ if (True(aliased_image.flags & ImageFlagBits::GpuModified)) {
+ image.flags |= ImageFlagBits::GpuModified;
+ }
}
}
if (aliased_images.empty()) {
@@ -1725,7 +1761,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
});
const auto& resolution = Settings::values.resolution_info;
for (const AliasedImage* const aliased : aliased_images) {
- if (!resolution.active | !any_rescaled) {
+ if (!resolution.active || !any_rescaled) {
CopyImage(image_id, aliased->id, aliased->copies);
continue;
}
@@ -1736,19 +1772,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
continue;
}
ScaleUp(aliased_image);
-
- const bool both_2d{image.info.type == ImageType::e2D &&
- aliased_image.info.type == ImageType::e2D};
- auto copies = aliased->copies;
- for (auto copy : copies) {
- copy.extent.width = std::max<u32>(
- (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1);
- if (both_2d) {
- copy.extent.height = std::max<u32>(
- (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1);
- }
- }
- CopyImage(image_id, aliased->id, copies);
+ CopyImage(image_id, aliased->id, aliased->copies);
}
}
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 7107887a6..b1324edf3 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -22,7 +22,6 @@
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
-#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
@@ -60,8 +59,10 @@ class TextureCache {
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
- static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
- static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
+ static constexpr s64 TARGET_THRESHOLD = 4_GiB;
+ static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
+ static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
+ static constexpr size_t GC_EMERGENCY_COUNTS = 2;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
@@ -373,6 +374,7 @@ private:
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
+ size_t critical_gc;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 7af52de2e..f13669ea5 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -9,10 +9,8 @@
#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/image_base.h"
-#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 25161df1f..28e4beafd 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -16,6 +16,7 @@
// <http://gamma.cs.unc.edu/FasTC/>
#include <algorithm>
+#include <bit>
#include <cassert>
#include <cstring>
#include <span>
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 14d2beec0..564ae1e36 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -4,9 +4,6 @@
#pragma once
-#include <bit>
-#include "common/common_types.h"
-
namespace Tegra::Texture::ASTC {
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 24e943e4c..6dae23049 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
#include <cmath>
#include <cstring>
#include <span>
-#include <utility>
#include "common/alignment.h"
#include "common/assert.h"
@@ -14,7 +13,6 @@
#include "common/div_ceil.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
-#include "video_core/textures/texture.h"
namespace Tegra::Texture {
namespace {
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 06954963d..f56b4b9f9 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <algorithm>
#include <array>
#include "common/cityhash.h"
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 329bf4def..2f2594585 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) {
+ scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index eae1891dd..55c115081 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -4,20 +4,22 @@
#pragma once
-#include <filesystem>
-#include <mutex>
#include <span>
-#include <string>
-#include <vector>
#include "common/common_types.h"
-#include "common/dynamic_library.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
#ifdef HAS_NSIGHT_AFTERMATH
+#include <filesystem>
+#include <mutex>
+
+// Vulkan headers must be included before Aftermath
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
+
+#include "common/dynamic_library.h"
#endif
namespace Vulkan {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index effde73c9..bd05a1f84 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -6,19 +6,20 @@
#include <bitset>
#include <chrono>
#include <optional>
-#include <string_view>
#include <thread>
#include <unordered_set>
#include <utility>
#include <vector>
#include "common/assert.h"
+#include "common/literals.h"
#include "common/settings.h"
#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+using namespace Common::Literals;
namespace {
namespace Alternatives {
constexpr std::array STENCIL8_UINT{
@@ -44,6 +45,12 @@ constexpr std::array B5G6R5_UNORM_PACK16{
VK_FORMAT_R5G6B5_UNORM_PACK16,
VK_FORMAT_UNDEFINED,
};
+
+constexpr std::array R4G4_UNORM_PACK8{
+ VK_FORMAT_R8_UNORM,
+ VK_FORMAT_UNDEFINED,
+};
+
} // namespace Alternatives
enum class NvidiaArchitecture {
@@ -94,6 +101,8 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
case VK_FORMAT_B5G6R5_UNORM_PACK16:
return Alternatives::B5G6R5_UNORM_PACK16.data();
+ case VK_FORMAT_R4G4_UNORM_PACK8:
+ return Alternatives::R4G4_UNORM_PACK8.data();
default:
return nullptr;
}
@@ -121,6 +130,8 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_R5G6B5_UNORM_PACK16,
VK_FORMAT_B5G6R5_UNORM_PACK16,
+ VK_FORMAT_R5G5B5A1_UNORM_PACK16,
+ VK_FORMAT_B5G5R5A1_UNORM_PACK16,
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
VK_FORMAT_A2B10G10R10_UINT_PACK32,
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
@@ -159,7 +170,9 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SRGB,
+ VK_FORMAT_R4G4_UNORM_PACK8,
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+ VK_FORMAT_B4G4R4A4_UNORM_PACK16,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D16_UNORM,
VK_FORMAT_S8_UINT,
@@ -597,6 +610,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
+ is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
+ is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
+ is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
+ properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
+
CollectPhysicalMemoryInfo();
CollectTelemetryParameters();
CollectToolingInfo();
@@ -615,6 +633,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
khr_push_descriptor = false;
break;
}
+ const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff;
+ if (nv_major_version >= 510) {
+ LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
+ cant_blit_msaa = true;
+ }
}
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
if (ext_extended_dynamic_state && is_radv) {
@@ -725,7 +748,7 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
}
void Device::ReportLoss() const {
- LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
+ LOG_CRITICAL(Render_Vulkan, "Device loss occurred!");
// Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{15});
@@ -986,6 +1009,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
false);
test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
+ test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true);
if (Settings::values.enable_nsight_aftermath) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
@@ -998,7 +1022,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VkPhysicalDeviceFeatures2KHR features{};
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
- VkPhysicalDeviceProperties2KHR physical_properties;
+ VkPhysicalDeviceProperties2KHR physical_properties{};
physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
if (has_khr_shader_float16_int8) {
@@ -1268,15 +1292,50 @@ void Device::CollectTelemetryParameters() {
vendor_name = driver.driverName;
}
+u64 Device::GetDeviceMemoryUsage() const {
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
+ budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
+ budget.pNext = nullptr;
+ physical.GetMemoryProperties(&budget);
+ u64 result{};
+ for (const size_t heap : valid_heap_memory) {
+ result += budget.heapUsage[heap];
+ }
+ return result;
+}
+
void Device::CollectPhysicalMemoryInfo() {
- const auto mem_properties = physical.GetMemoryProperties();
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
+ budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
+ const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr);
+ const auto& mem_properties = mem_info.memoryProperties;
const size_t num_properties = mem_properties.memoryHeapCount;
device_access_memory = 0;
+ u64 device_initial_usage = 0;
+ u64 local_memory = 0;
for (size_t element = 0; element < num_properties; ++element) {
- if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
- device_access_memory += mem_properties.memoryHeaps[element].size;
+ const bool is_heap_local =
+ (mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0;
+ if (!is_integrated && !is_heap_local) {
+ continue;
}
+ valid_heap_memory.push_back(element);
+ if (is_heap_local) {
+ local_memory += mem_properties.memoryHeaps[element].size;
+ }
+ if (ext_memory_budget) {
+ device_initial_usage += budget.heapUsage[element];
+ device_access_memory += budget.heapBudget[element];
+ continue;
+ }
+ device_access_memory += mem_properties.memoryHeaps[element].size;
+ }
+ if (!is_integrated) {
+ return;
}
+ const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
+ device_access_memory = static_cast<u64>(std::max<s64>(
+ std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory)));
}
void Device::CollectToolingInfo() {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 34b1add16..2d709d069 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -6,7 +6,6 @@
#include <span>
#include <string>
-#include <string_view>
#include <unordered_map>
#include <vector>
@@ -342,6 +341,12 @@ public:
return device_access_memory;
}
+ bool CanReportMemoryUsage() const {
+ return ext_memory_budget;
+ }
+
+ u64 GetDeviceMemoryUsage() const;
+
u32 GetSetsPerPool() const {
return sets_per_pool;
}
@@ -422,6 +427,9 @@ private:
bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list
///< topologies.
bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch.
+ bool is_integrated{}; ///< Is GPU an iGPU.
+ bool is_virtual{}; ///< Is GPU a virtual GPU.
+ bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
@@ -446,6 +454,7 @@ private:
bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization.
bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex.
+ bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit
bool has_renderdoc{}; ///< Has RenderDoc attached
@@ -457,6 +466,7 @@ private:
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
+ std::vector<size_t> valid_heap_memory; ///< Heaps used.
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index bfd6e6add..662694f16 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -2,11 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <algorithm>
#include <future>
#include <optional>
#include <span>
-#include <utility>
#include <vector>
#include "common/common_types.h"
diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp
index 22833fa56..d69de05ef 100644
--- a/src/video_core/vulkan_common/vulkan_library.cpp
+++ b/src/video_core/vulkan_common/vulkan_library.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <cstdlib>
#include <string>
#include "common/dynamic_library.h"
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 300a61205..e6e97b332 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -227,7 +227,7 @@ void MemoryCommit::Release() {
}
MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
- : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
+ : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
export_allocations{export_allocations_},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 86e8ed119..338daf5ba 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -6,7 +6,6 @@
#include <memory>
#include <span>
-#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index a9faa4807..742cc39da 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -3,10 +3,8 @@
// Refer to the license.txt file included.
#include <algorithm>
-#include <exception>
#include <memory>
#include <optional>
-#include <string_view>
#include <utility>
#include <vector>
@@ -239,8 +237,8 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) &&
X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) &&
X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) &&
- X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) &&
- X(vkGetPhysicalDeviceQueueFamilyProperties);
+ X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceMemoryProperties2) &&
+ X(vkGetPhysicalDeviceProperties) && X(vkGetPhysicalDeviceQueueFamilyProperties);
#undef X
}
@@ -928,9 +926,12 @@ std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR(
return modes;
}
-VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept {
- VkPhysicalDeviceMemoryProperties properties;
- dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties);
+VkPhysicalDeviceMemoryProperties2 PhysicalDevice::GetMemoryProperties(
+ void* next_structures) const noexcept {
+ VkPhysicalDeviceMemoryProperties2 properties{};
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
+ properties.pNext = next_structures;
+ dld->vkGetPhysicalDeviceMemoryProperties2(physical_device, &properties);
return properties;
}
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index b7ae01c6c..0a5f9931c 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -5,7 +5,6 @@
#pragma once
#include <exception>
-#include <iterator>
#include <limits>
#include <memory>
#include <optional>
@@ -173,6 +172,7 @@ struct InstanceDispatch {
PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{};
PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{};
PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{};
+ PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2{};
PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{};
PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{};
PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{};
@@ -951,7 +951,8 @@ public:
std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const;
- VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept;
+ VkPhysicalDeviceMemoryProperties2 GetMemoryProperties(
+ void* next_structures = nullptr) const noexcept;
private:
VkPhysicalDevice physical_device = nullptr;