summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h11
-rw-r--r--src/video_core/dma_pusher.cpp19
-rw-r--r--src/video_core/dma_pusher.h8
-rw-r--r--src/video_core/engines/draw_manager.cpp58
-rw-r--r--src/video_core/engines/draw_manager.h4
-rw-r--r--src/video_core/engines/engine_upload.cpp4
-rw-r--r--src/video_core/engines/engine_upload.h7
-rw-r--r--src/video_core/engines/maxwell_dma.cpp34
-rw-r--r--src/video_core/engines/maxwell_dma.h8
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/host1x/vic.cpp6
-rw-r--r--src/video_core/host1x/vic.h7
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_device.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp77
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h14
-rw-r--r--src/video_core/texture_cache/texture_cache.h35
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h6
-rw-r--r--src/video_core/texture_cache/util.cpp20
-rw-r--r--src/video_core/texture_cache/util.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp32
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h16
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp12
32 files changed, 316 insertions, 174 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 158360830..f1c60d1f3 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -20,6 +20,7 @@
#include "common/lru_cache.h"
#include "common/microprofile.h"
#include "common/polyfill_ranges.h"
+#include "common/scratch_buffer.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
@@ -422,8 +423,7 @@ private:
IntervalSet common_ranges;
std::deque<IntervalSet> committed_ranges;
- size_t immediate_buffer_capacity = 0;
- std::unique_ptr<u8[]> immediate_buffer_alloc;
+ Common::ScratchBuffer<u8> immediate_buffer_alloc;
struct LRUItemParams {
using ObjectType = BufferId;
@@ -1927,11 +1927,8 @@ std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size
template <class P>
std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
- if (wanted_capacity > immediate_buffer_capacity) {
- immediate_buffer_capacity = wanted_capacity;
- immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity);
- }
- return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity);
+ immediate_buffer_alloc.resize_destructive(wanted_capacity);
+ return std::span<u8>(immediate_buffer_alloc.data(), wanted_capacity);
}
template <class P>
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 9835e3ac1..322de2606 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -56,7 +56,7 @@ bool DmaPusher::Step() {
if (command_list.prefetch_command_list.size()) {
// Prefetched command list from nvdrv, used for things like synchronization
- command_headers = std::move(command_list.prefetch_command_list);
+ ProcessCommands(command_list.prefetch_command_list);
dma_pushbuffer.pop();
} else {
const CommandListHeader command_list_header{
@@ -74,7 +74,7 @@ bool DmaPusher::Step() {
}
// Push buffer non-empty, read a word
- command_headers.resize(command_list_header.size);
+ command_headers.resize_destructive(command_list_header.size);
if (Settings::IsGPULevelHigh()) {
memory_manager.ReadBlock(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
@@ -82,16 +82,21 @@ bool DmaPusher::Step() {
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
}
+ ProcessCommands(command_headers);
}
- for (std::size_t index = 0; index < command_headers.size();) {
- const CommandHeader& command_header = command_headers[index];
+
+ return true;
+}
+
+void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
+ for (std::size_t index = 0; index < commands.size();) {
+ const CommandHeader& command_header = commands[index];
if (dma_state.method_count) {
// Data word of methods command
if (dma_state.non_incrementing) {
const u32 max_write = static_cast<u32>(
- std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) -
- index);
+ std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
CallMultiMethod(&command_header.argument, max_write);
dma_state.method_count -= max_write;
dma_state.is_last_call = true;
@@ -142,8 +147,6 @@ bool DmaPusher::Step() {
}
index++;
}
-
- return true;
}
void DmaPusher::SetState(const CommandHeader& command_header) {
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 938f0f11c..6f00de937 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -4,11 +4,13 @@
#pragma once
#include <array>
+#include <span>
#include <vector>
#include <queue>
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/puller.h"
@@ -136,13 +138,15 @@ private:
static constexpr u32 non_puller_methods = 0x40;
static constexpr u32 max_subchannels = 8;
bool Step();
+ void ProcessCommands(std::span<const CommandHeader> commands);
void SetState(const CommandHeader& command_header);
void CallMethod(u32 argument) const;
void CallMultiMethod(const u32* base_start, u32 num_methods) const;
- std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
+ Common::ScratchBuffer<CommandHeader>
+ command_headers; ///< Buffer for list of commands fetched at once
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
@@ -159,7 +163,7 @@ private:
DmaState dma_state{};
bool dma_increment_once{};
- bool ib_enable{true}; ///< IB mode enabled
+ const bool ib_enable{true}; ///< IB mode enabled
std::array<Engines::EngineInterface*, max_subchannels> subchannels{};
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index b213c374f..3a78421f6 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -46,21 +46,26 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
SetInlineIndexBuffer(regs.inline_index_4x8.index2);
SetInlineIndexBuffer(regs.inline_index_4x8.index3);
break;
- case MAXWELL3D_REG_INDEX(topology_override):
- use_topology_override = true;
+ case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
+ case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): {
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
break;
+ }
default:
break;
}
}
void DrawManager::Clear(u32 layer_count) {
- maxwell3d->rasterizer->Clear(layer_count);
+ if (maxwell3d->ShouldExecute()) {
+ maxwell3d->rasterizer->Clear(layer_count);
+ }
}
void DrawManager::DrawDeferred() {
- if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0)
+ if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0) {
return;
+ }
DrawEnd(draw_state.instance_count + 1, true);
draw_state.instance_count = 0;
}
@@ -115,8 +120,9 @@ void DrawManager::DrawEnd(u32 instance_count, bool force_draw) {
const auto& regs{maxwell3d->regs};
switch (draw_state.draw_mode) {
case DrawMode::Instance:
- if (!force_draw)
+ if (!force_draw) {
break;
+ }
[[fallthrough]];
case DrawMode::General:
draw_state.base_instance = regs.global_base_instance_index;
@@ -156,25 +162,28 @@ void DrawManager::DrawIndexSmall(u32 argument) {
ProcessDraw(true, 1);
}
-void DrawManager::ProcessTopologyOverride() {
- if (!use_topology_override)
- return;
-
+void DrawManager::UpdateTopology() {
const auto& regs{maxwell3d->regs};
- switch (regs.topology_override) {
- case PrimitiveTopologyOverride::None:
- break;
- case PrimitiveTopologyOverride::Points:
- draw_state.topology = PrimitiveTopology::Points;
- break;
- case PrimitiveTopologyOverride::Lines:
- draw_state.topology = PrimitiveTopology::Lines;
- break;
- case PrimitiveTopologyOverride::LineStrip:
- draw_state.topology = PrimitiveTopology::LineStrip;
+ switch (regs.primitive_topology_control) {
+ case PrimitiveTopologyControl::UseInBeginMethods:
break;
- default:
- draw_state.topology = static_cast<PrimitiveTopology>(regs.topology_override);
+ case PrimitiveTopologyControl::UseSeparateState:
+ switch (regs.topology_override) {
+ case PrimitiveTopologyOverride::None:
+ break;
+ case PrimitiveTopologyOverride::Points:
+ draw_state.topology = PrimitiveTopology::Points;
+ break;
+ case PrimitiveTopologyOverride::Lines:
+ draw_state.topology = PrimitiveTopology::Lines;
+ break;
+ case PrimitiveTopologyOverride::LineStrip:
+ draw_state.topology = PrimitiveTopology::LineStrip;
+ break;
+ default:
+ draw_state.topology = static_cast<PrimitiveTopology>(regs.topology_override);
+ break;
+ }
break;
}
}
@@ -183,9 +192,10 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
- ProcessTopologyOverride();
+ UpdateTopology();
- if (maxwell3d->ShouldExecute())
+ if (maxwell3d->ShouldExecute()) {
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
+ }
}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 4f67027ca..0e6930a9c 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -10,6 +10,7 @@ class RasterizerInterface;
}
namespace Tegra::Engines {
+using PrimitiveTopologyControl = Maxwell3D::Regs::PrimitiveTopologyControl;
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
using IndexBuffer = Maxwell3D::Regs::IndexBuffer;
@@ -58,12 +59,11 @@ private:
void DrawIndexSmall(u32 argument);
- void ProcessTopologyOverride();
+ void UpdateTopology();
void ProcessDraw(bool draw_indexed, u32 instance_count);
Maxwell3D* maxwell3d{};
State draw_state{};
- bool use_topology_override{};
};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index e4f8331ab..cea1dd8b0 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -24,7 +24,7 @@ void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
void State::ProcessExec(const bool is_linear_) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
- inner_buffer.resize(copy_size);
+ inner_buffer.resize_destructive(copy_size);
is_linear = is_linear_;
}
@@ -70,7 +70,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
regs.dest.BlockHeight(), regs.dest.BlockDepth());
- tmp_buffer.resize(dst_size);
+ tmp_buffer.resize_destructive(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 94fafd9dc..7242d2529 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -4,9 +4,10 @@
#pragma once
#include <span>
-#include <vector>
+
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
namespace Tegra {
class MemoryManager;
@@ -73,8 +74,8 @@ private:
u32 write_offset = 0;
u32 copy_size = 0;
- std::vector<u8> inner_buffer;
- std::vector<u8> tmp_buffer;
+ Common::ScratchBuffer<u8> inner_buffer;
+ Common::ScratchBuffer<u8> tmp_buffer;
bool is_linear = false;
Registers& regs;
MemoryManager& memory_manager;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a189e60ae..f73d7bf0f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -184,12 +184,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+ read_buffer.resize_destructive(src_size);
+ write_buffer.resize_destructive(dst_size);
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
@@ -235,12 +231,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+ read_buffer.resize_destructive(src_size);
+ write_buffer.resize_destructive(dst_size);
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
if (Settings::IsGPULevelExtreme()) {
@@ -269,12 +261,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
pos_x = pos_x % x_in_gob;
pos_y = pos_y % 8;
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+ read_buffer.resize_destructive(src_size);
+ write_buffer.resize_destructive(dst_size);
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
@@ -333,14 +321,10 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
const u32 pitch = x_elements * bytes_per_pixel;
const size_t mid_buffer_size = pitch * regs.line_count;
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+ read_buffer.resize_destructive(src_size);
+ write_buffer.resize_destructive(dst_size);
- intermediate_buffer.resize(mid_buffer_size);
+ intermediate_buffer.resize_destructive(mid_buffer_size);
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index d40d3d302..c88191a61 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,8 +6,10 @@
#include <array>
#include <cstddef>
#include <vector>
+
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "video_core/engines/engine_interface.h"
namespace Core {
@@ -234,9 +236,9 @@ private:
MemoryManager& memory_manager;
VideoCore::RasterizerInterface* rasterizer = nullptr;
- std::vector<u8> read_buffer;
- std::vector<u8> write_buffer;
- std::vector<u8> intermediate_buffer;
+ Common::ScratchBuffer<u8> read_buffer;
+ Common::ScratchBuffer<u8> write_buffer;
+ Common::ScratchBuffer<u8> intermediate_buffer;
static constexpr std::size_t NUM_REGS = 0x800;
struct Regs {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 28b38273e..c6d54be63 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -223,8 +223,6 @@ struct GPU::Impl {
/// core timing events.
void Start() {
gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
- cpu_context = renderer->GetRenderWindow().CreateSharedContext();
- cpu_context->MakeCurrent();
}
void NotifyShutdown() {
@@ -235,6 +233,9 @@ struct GPU::Impl {
/// Obtain the CPU Context
void ObtainContext() {
+ if (!cpu_context) {
+ cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+ }
cpu_context->MakeCurrent();
}
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index ac0b7d20e..36a04e4e0 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -155,7 +155,7 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
// swizzle pitch linear to block linear
const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
- luma_buffer.resize(size);
+ luma_buffer.resize_destructive(size);
std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height);
Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height,
block_height, 0, width * 4);
@@ -181,8 +181,8 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
const auto stride = static_cast<size_t>(frame->linesize[0]);
- luma_buffer.resize(aligned_width * surface_height);
- chroma_buffer.resize(aligned_width * surface_height / 2);
+ luma_buffer.resize_destructive(aligned_width * surface_height);
+ chroma_buffer.resize_destructive(aligned_width * surface_height / 2);
// Populate luma buffer
const u8* luma_src = frame->data[0];
diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h
index 2b78786e8..3d9753047 100644
--- a/src/video_core/host1x/vic.h
+++ b/src/video_core/host1x/vic.h
@@ -4,8 +4,9 @@
#pragma once
#include <memory>
-#include <vector>
+
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
struct SwsContext;
@@ -49,8 +50,8 @@ private:
/// size does not change during a stream
using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
AVMallocPtr converted_frame_buffer;
- std::vector<u8> luma_buffer;
- std::vector<u8> chroma_buffer;
+ Common::ScratchBuffer<u8> luma_buffer;
+ Common::ScratchBuffer<u8> chroma_buffer;
GPUVAddr config_struct_address{};
GPUVAddr output_surface_luma_address{};
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index e2e3dac34..cee5c3247 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -112,7 +112,7 @@ bool IsASTCSupported() {
}
} // Anonymous namespace
-Device::Device() {
+Device::Device(Core::Frontend::EmuWindow& emu_window) {
if (!GLAD_GL_VERSION_4_6) {
LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
throw std::runtime_error{"Insufficient version"};
@@ -126,9 +126,9 @@ Device::Device() {
const bool is_intel = vendor_name == "Intel";
#ifdef __unix__
- const bool is_linux = true;
+ constexpr bool is_linux = true;
#else
- const bool is_linux = false;
+ constexpr bool is_linux = false;
#endif
bool disable_fast_buffer_sub_data = false;
@@ -193,9 +193,11 @@ Device::Device() {
}
}
+ strict_context_required = emu_window.StrictContextRequired();
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
+ // Blocks EGL on Wayland from using asynchronous shader compilation.
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
- !(is_amd || (is_intel && !is_linux));
+ !(is_amd || (is_intel && !is_linux)) && !strict_context_required;
use_driver_cache = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 5ef51ebcf..2a72d84be 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -5,6 +5,7 @@
#include <cstddef>
#include "common/common_types.h"
+#include "core/frontend/emu_window.h"
#include "shader_recompiler/stage.h"
namespace Settings {
@@ -15,7 +16,7 @@ namespace OpenGL {
class Device {
public:
- explicit Device();
+ explicit Device(Core::Frontend::EmuWindow& emu_window);
[[nodiscard]] std::string GetVendorName() const;
@@ -173,6 +174,10 @@ public:
return can_report_memory;
}
+ bool StrictContextRequired() const {
+ return strict_context_required;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -216,6 +221,7 @@ private:
bool has_cbuf_ftou_bug{};
bool has_bool_ref_bug{};
bool can_report_memory{};
+ bool strict_context_required{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 64ed6f628..a44b8c454 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -138,9 +138,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load
void RasterizerOpenGL::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(OpenGL_Clears);
- if (!maxwell3d->ShouldExecute()) {
- return;
- }
const auto& regs = maxwell3d->regs;
bool use_color{};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a59d0d24e..f8868a012 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -174,6 +174,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
state_tracker{state_tracker_}, shader_notify{shader_notify_},
use_asynchronous_shaders{device.UseAsynchronousShaders()},
+ strict_context_required{device.StrictContextRequired()},
profile{
.supported_spirv = 0x00010000,
@@ -203,6 +204,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.support_int64_atomics = false,
.support_derivative_control = device.HasDerivativeControl(),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
+ .support_native_ndc = true,
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
.support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
@@ -255,9 +257,14 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
}
shader_cache_filename = base_dir / "opengl.bin";
- if (!workers) {
+ if (!workers && !strict_context_required) {
workers = CreateWorkers();
}
+ std::optional<Context> strict_context;
+ if (strict_context_required) {
+ strict_context.emplace(emu_window);
+ }
+
struct {
std::mutex mutex;
size_t total{};
@@ -265,44 +272,49 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
bool has_loaded{};
} state;
+ const auto queue_work{[&](Common::UniqueFunction<void, Context*>&& work) {
+ if (strict_context_required) {
+ work(&strict_context.value());
+ } else {
+ workers->QueueWork(std::move(work));
+ }
+ }};
const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
ComputePipelineKey key;
file.read(reinterpret_cast<char*>(&key), sizeof(key));
- workers->QueueWork(
- [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
- ctx->pools.ReleaseContents();
- auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
- std::scoped_lock lock{state.mutex};
- if (pipeline) {
- compute_cache.emplace(key, std::move(pipeline));
- }
- ++state.built;
- if (state.has_loaded) {
- callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
- }
- });
+ queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+ std::scoped_lock lock{state.mutex};
+ if (pipeline) {
+ compute_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
++state.total;
}};
const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
GraphicsPipelineKey key;
file.read(reinterpret_cast<char*>(&key), sizeof(key));
- workers->QueueWork(
- [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
- boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
- for (auto& env : envs) {
- env_ptrs.push_back(&env);
- }
- ctx->pools.ReleaseContents();
- auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
- std::scoped_lock lock{state.mutex};
- if (pipeline) {
- graphics_cache.emplace(key, std::move(pipeline));
- }
- ++state.built;
- if (state.has_loaded) {
- callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
- }
- });
+ queue_work([this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
+ boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+ for (auto& env : envs) {
+ env_ptrs.push_back(&env);
+ }
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+ std::scoped_lock lock{state.mutex};
+ if (pipeline) {
+ graphics_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
++state.total;
}};
LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);
@@ -314,6 +326,9 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
state.has_loaded = true;
lock.unlock();
+ if (strict_context_required) {
+ return;
+ }
workers->WaitForRequests(stop_loading);
if (!use_asynchronous_shaders) {
workers.reset();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 53ffea904..f82420592 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -69,6 +69,7 @@ private:
StateTracker& state_tracker;
VideoCore::ShaderNotify& shader_notify;
const bool use_asynchronous_shaders;
+ const bool strict_context_required;
GraphicsPipelineKey graphics_key{};
GraphicsPipeline* current_pipeline{};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 5b5e178ad..bc75680f0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -140,8 +140,8 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
- emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{},
- program_manager{device},
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_},
+ state_tracker{}, program_manager{device},
rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 18be54729..f502a7d09 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -139,23 +139,25 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false;
- const auto recreate_swapchain = [&] {
+ const auto recreate_swapchain = [&](u32 width, u32 height) {
if (!has_been_recreated) {
has_been_recreated = true;
scheduler.Finish();
}
- const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
- swapchain.Create(layout.width, layout.height, is_srgb);
+ swapchain.Create(width, height, is_srgb);
};
- if (swapchain.NeedsRecreation(is_srgb)) {
- recreate_swapchain();
+
+ const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
+ if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width ||
+ swapchain.GetHeight() != layout.height) {
+ recreate_swapchain(layout.width, layout.height);
}
bool is_outdated;
do {
swapchain.AcquireNextImage();
is_outdated = swapchain.IsOutDated();
if (is_outdated) {
- recreate_swapchain();
+ recreate_swapchain(layout.width, layout.height);
}
} while (is_outdated);
if (has_been_recreated) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 54a12b35f..6b54d7111 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -461,6 +461,9 @@ void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 fir
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
u32 stride) {
+ if (index >= device.GetMaxVertexInputBindings()) {
+ return;
+ }
if (device.IsExtExtendedDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 006128638..515d8d869 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -529,7 +529,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
if (key.state.dynamic_vertex_input) {
- for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+ const size_t num_vertex_arrays = std::min(
+ key.state.attributes.size(), static_cast<size_t>(device.GetMaxVertexInputBindings()));
+ for (size_t index = 0; index < num_vertex_arrays; ++index) {
const u32 type = key.state.DynamicAttributeType(index);
if (!stage_infos[0].loads.Generic(index) || type == 0) {
continue;
@@ -551,7 +553,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
});
}
} else {
- for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const size_t num_vertex_arrays = std::min(
+ Maxwell::NumVertexArrays, static_cast<size_t>(device.GetMaxVertexInputBindings()));
+ for (size_t index = 0; index < num_vertex_arrays; ++index) {
const bool instanced = key.state.binding_divisors[index] != 0;
const auto rate =
instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
@@ -580,6 +584,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
});
}
}
+ ASSERT(vertex_attributes.size() <= device.GetMaxVertexInputAttributes());
+
VkPipelineVertexInputStateCreateInfo vertex_input_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -634,23 +640,33 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
};
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
- const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
+ VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
.pNext = nullptr,
.flags = 0,
.viewportCount = Maxwell::NumViewports,
.pViewportSwizzles = swizzles.data(),
};
- const VkPipelineViewportStateCreateInfo viewport_ci{
+ VkPipelineViewportDepthClipControlCreateInfoEXT ndc_info{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE,
+ };
+ VkPipelineViewportStateCreateInfo viewport_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr,
+ .pNext = nullptr,
.flags = 0,
.viewportCount = Maxwell::NumViewports,
.pViewports = nullptr,
.scissorCount = Maxwell::NumViewports,
.pScissors = nullptr,
};
-
+ if (device.IsNvViewportSwizzleSupported()) {
+ swizzle_ci.pNext = std::exchange(viewport_ci.pNext, &swizzle_ci);
+ }
+ if (device.IsExtDepthClipControlSupported()) {
+ ndc_info.pNext = std::exchange(viewport_ci.pNext, &ndc_info);
+ }
VkPipelineRasterizationStateCreateInfo rasterization_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 81f5f3e11..e7262420c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -321,6 +321,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
.support_derivative_control = true,
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
+ .support_native_ndc = device.IsExtDepthClipControlSupported(),
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
@@ -341,6 +342,15 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_snorm_render_buffer = true,
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
};
+
+ if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
+ LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
+ device.GetMaxVertexInputAttributes(), Maxwell::NumVertexAttributes);
+ }
+ if (device.GetMaxVertexInputBindings() < Maxwell::NumVertexArrays) {
+ LOG_WARNING(Render_Vulkan, "maxVertexInputBindings is too low: {} < {}",
+ device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays);
+ }
}
PipelineCache::~PipelineCache() = default;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 3774f303a..ac1eb9895 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -220,9 +220,6 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- if (!maxwell3d->ShouldExecute()) {
- return;
- }
FlushWork();
query_cache.UpdateCounters();
@@ -665,8 +662,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
return;
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
- regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) ||
- regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry));
+ regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
scheduler.Record(
[](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index d7be417f5..b6810eef9 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -67,17 +67,19 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
-Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width,
- u32 height, bool srgb)
+Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_,
+ u32 width_, u32 height_, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
- Create(width, height, srgb);
+ Create(width_, height_, srgb);
}
Swapchain::~Swapchain() = default;
-void Swapchain::Create(u32 width, u32 height, bool srgb) {
+void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
is_outdated = false;
is_suboptimal = false;
+ width = width_;
+ height = height_;
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
@@ -88,7 +90,7 @@ void Swapchain::Create(u32 width, u32 height, bool srgb) {
device.GetLogical().WaitIdle();
Destroy();
- CreateSwapchain(capabilities, width, height, srgb);
+ CreateSwapchain(capabilities, srgb);
CreateSemaphores();
CreateImageViews();
@@ -148,8 +150,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
}
}
-void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
- bool srgb) {
+void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) {
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 111b3902d..caf1ff32b 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -80,9 +80,16 @@ public:
return *present_semaphores[frame_index];
}
+ u32 GetWidth() const {
+ return width;
+ }
+
+ u32 GetHeight() const {
+ return height;
+ }
+
private:
- void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
- bool srgb);
+ void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb);
void CreateSemaphores();
void CreateImageViews();
@@ -105,6 +112,9 @@ private:
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
+ u32 width;
+ u32 height;
+
u32 image_index{};
u32 frame_index{};
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8e68a2e53..27c82cd20 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -39,6 +39,12 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
sampler_descriptor.cubemap_anisotropy.Assign(1);
+ // These values were chosen based on typical peak swizzle data sizes seen in some titles
+ static constexpr size_t SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 8_MiB;
+ static constexpr size_t UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 1_MiB;
+ swizzle_data_buffer.resize_destructive(SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY);
+ unswizzle_data_buffer.resize_destructive(UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY);
+
// Make sure the first index is reserved for the null resources
// This way the null resource becomes a compile time constant
void(slot_images.insert(NullImageParams{}));
@@ -90,7 +96,8 @@ void TextureCache<P>::RunGarbageCollector() {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
+ swizzle_data_buffer);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
@@ -461,7 +468,8 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
+ swizzle_data_buffer);
}
}
@@ -672,7 +680,8 @@ void TextureCache<P>::PopAsyncFlushes() {
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
- SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
+ swizzle_data_buffer);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
@@ -734,13 +743,21 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
const auto uploads = FullUploadSwizzles(image.info);
runtime.AccelerateImageUpload(image, staging, uploads);
- } else if (True(image.flags & ImageFlagBits::Converted)) {
- std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
- auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
- ConvertImage(unswizzled_data, image.info, mapped_span, copies);
+ return;
+ }
+ const size_t guest_size_bytes = image.guest_size_bytes;
+ swizzle_data_buffer.resize_destructive(guest_size_bytes);
+ gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
+
+ if (True(image.flags & ImageFlagBits::Converted)) {
+ unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
+ auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer,
+ unswizzle_data_buffer);
+ ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
image.UploadMemory(staging, copies);
} else {
- const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
+ const auto copies =
+ UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span);
image.UploadMemory(staging, copies);
}
}
@@ -910,7 +927,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
}
template <class P>
-u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
+u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale *
Settings::values.resolution_info.up_scale);
const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift +
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 587339a31..4fd677a80 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -17,6 +17,7 @@
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/polyfill_ranges.h"
+#include "common/scratch_buffer.h"
#include "video_core/compatible_formats.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
@@ -368,7 +369,7 @@ private:
void InvalidateScale(Image& image);
bool ScaleUp(Image& image);
bool ScaleDown(Image& image);
- u64 GetScaledImageSizeBytes(ImageBase& image);
+ u64 GetScaledImageSizeBytes(const ImageBase& image);
Runtime& runtime;
@@ -417,6 +418,9 @@ private:
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
+ Common::ScratchBuffer<u8> swizzle_data_buffer;
+ Common::ScratchBuffer<u8> unswizzle_data_buffer;
+
u64 modification_tick = 0;
u64 frame_tick = 0;
};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index e8c908b42..03acc68d9 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -505,7 +505,7 @@ void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageInfo& info, const BufferImageCopy& copy,
- std::span<const u8> input) {
+ std::span<const u8> input, Common::ScratchBuffer<u8>& tmp_buffer) {
const Extent3D size = info.size;
const LevelInfo level_info = MakeLevelInfo(info);
const Extent2D tile_size = DefaultBlockSize(info.format);
@@ -534,8 +534,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
tile_size.height, info.tile_width_spacing);
const size_t subresource_size = sizes[level];
- const auto dst_data = std::make_unique<u8[]>(subresource_size);
- const std::span<u8> dst(dst_data.get(), subresource_size);
+ tmp_buffer.resize_destructive(subresource_size);
+ const std::span<u8> dst(tmp_buffer);
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset);
@@ -765,8 +765,9 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
}
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
- const ImageInfo& info, std::span<u8> output) {
- const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
+ const ImageInfo& info, std::span<const u8> input,
+ std::span<u8> output) {
+ const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const Extent3D size = info.size;
@@ -789,10 +790,6 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
.image_extent = size,
}};
}
- const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
- gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
- const std::span<const u8> input(input_data.get(), guest_size_bytes);
-
const LevelInfo level_info = MakeLevelInfo(info);
const s32 num_layers = info.resources.layers;
const s32 num_levels = info.resources.levels;
@@ -980,13 +977,14 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
}
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
- std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
+ std::span<const BufferImageCopy> copies, std::span<const u8> memory,
+ Common::ScratchBuffer<u8>& tmp_buffer) {
const bool is_pitch_linear = info.type == ImageType::Linear;
for (const BufferImageCopy& copy : copies) {
if (is_pitch_linear) {
SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
} else {
- SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
+ SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory, tmp_buffer);
}
}
}
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 5e28f4ab3..d103db8ae 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -7,6 +7,7 @@
#include <span>
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/image_base.h"
@@ -59,6 +60,7 @@ struct OverlapResult {
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<const u8> input,
std::span<u8> output);
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
@@ -76,7 +78,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
- std::span<const BufferImageCopy> copies, std::span<const u8> memory);
+ std::span<const BufferImageCopy> copies, std::span<const u8> memory,
+ Common::ScratchBuffer<u8>& tmp_buffer);
[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
const ImageInfo& overlap_info, u32 new_level,
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 6a2ad4b1d..c4d31681a 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -421,7 +421,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
VkPhysicalDevice8BitStorageFeatures bit8_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES,
.pNext = nullptr,
- .storageBuffer8BitAccess = false,
+ .storageBuffer8BitAccess = true,
.uniformAndStorageBuffer8BitAccess = true,
.storagePushConstant8 = false,
};
@@ -660,6 +660,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
+ VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features;
+ if (ext_depth_clip_control) {
+ depth_clip_control_features = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT,
+ .pNext = nullptr,
+ .depthClipControl = VK_TRUE,
+ };
+ SetNext(next, depth_clip_control_features);
+ }
+
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) {
nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
@@ -1044,6 +1054,7 @@ void Device::CheckSuitability(bool requires_swapchain) const {
std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"),
std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess,
"uniformAndStorageBuffer16BitAccess"),
+ std::make_pair(bit8_storage.storageBuffer8BitAccess, "storageBuffer8BitAccess"),
std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess,
"uniformAndStorageBuffer8BitAccess"),
std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"),
@@ -1083,6 +1094,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
bool has_ext_vertex_input_dynamic_state{};
bool has_ext_line_rasterization{};
bool has_ext_primitive_topology_list_restart{};
+ bool has_ext_depth_clip_control{};
for (const std::string& extension : supported_extensions) {
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
@@ -1116,6 +1128,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
true);
+ test(has_ext_depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME, false);
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
@@ -1279,6 +1292,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
ext_line_rasterization = true;
}
}
+ if (has_ext_depth_clip_control) {
+ VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features;
+ depth_clip_control_features.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT;
+ depth_clip_control_features.pNext = nullptr;
+ features.pNext = &depth_clip_control_features;
+ physical.GetFeatures2(features);
+
+ if (depth_clip_control_features.depthClipControl) {
+ extensions.push_back(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
+ ext_depth_clip_control = true;
+ }
+ }
if (has_khr_workgroup_memory_explicit_layout) {
VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
layout.sType =
@@ -1380,6 +1406,10 @@ void Device::SetupFeatures() {
is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
is_blit_depth_stencil_supported = TestDepthStencilBlits();
is_optimal_astc_supported = IsOptimalAstcSupported(features);
+
+ const VkPhysicalDeviceLimits& limits{properties.limits};
+ max_vertex_input_attributes = limits.maxVertexInputAttributes;
+ max_vertex_input_bindings = limits.maxVertexInputBindings;
}
void Device::SetupProperties() {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index db802437c..6a26c4e6e 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -256,6 +256,11 @@ public:
return ext_depth_range_unrestricted;
}
+ /// Returns true if the device supports VK_EXT_depth_clip_control.
+ bool IsExtDepthClipControlSupported() const {
+ return ext_depth_clip_control;
+ }
+
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
bool IsExtShaderViewportIndexLayerSupported() const {
return ext_shader_viewport_index_layer;
@@ -368,6 +373,14 @@ public:
return must_emulate_bgr565;
}
+ u32 GetMaxVertexInputAttributes() const {
+ return max_vertex_input_attributes;
+ }
+
+ u32 GetMaxVertexInputBindings() const {
+ return max_vertex_input_bindings;
+ }
+
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -446,6 +459,7 @@ private:
bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
+ bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
@@ -467,6 +481,8 @@ private:
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
+ u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline
+ u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 483b534a0..7dca7341c 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -314,6 +314,18 @@ const char* ToString(VkResult result) noexcept {
return "VK_ERROR_VALIDATION_FAILED_EXT";
case VkResult::VK_ERROR_INVALID_SHADER_NV:
return "VK_ERROR_INVALID_SHADER_NV";
+ case VkResult::VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR:
+ return "VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR";
+ case VkResult::VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR:
+ return "VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR";
+ case VkResult::VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR:
+ return "VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR";
+ case VkResult::VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR:
+ return "VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR";
+ case VkResult::VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR:
+ return "VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR";
+ case VkResult::VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR:
+ return "VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR";
case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT";
case VkResult::VK_ERROR_FRAGMENTATION_EXT: