diff options
Diffstat (limited to '')
32 files changed, 316 insertions, 174 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 158360830..f1c60d1f3 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,6 +20,7 @@ #include "common/lru_cache.h" #include "common/microprofile.h" #include "common/polyfill_ranges.h" +#include "common/scratch_buffer.h" #include "common/settings.h" #include "core/memory.h" #include "video_core/buffer_cache/buffer_base.h" @@ -422,8 +423,7 @@ private: IntervalSet common_ranges; std::deque<IntervalSet> committed_ranges; - size_t immediate_buffer_capacity = 0; - std::unique_ptr<u8[]> immediate_buffer_alloc; + Common::ScratchBuffer<u8> immediate_buffer_alloc; struct LRUItemParams { using ObjectType = BufferId; @@ -1927,11 +1927,8 @@ std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size template <class P> std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { - if (wanted_capacity > immediate_buffer_capacity) { - immediate_buffer_capacity = wanted_capacity; - immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity); - } - return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity); + immediate_buffer_alloc.resize_destructive(wanted_capacity); + return std::span<u8>(immediate_buffer_alloc.data(), wanted_capacity); } template <class P> diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 9835e3ac1..322de2606 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -56,7 +56,7 @@ bool DmaPusher::Step() { if (command_list.prefetch_command_list.size()) { // Prefetched command list from nvdrv, used for things like synchronization - command_headers = std::move(command_list.prefetch_command_list); + ProcessCommands(command_list.prefetch_command_list); dma_pushbuffer.pop(); } else { const CommandListHeader command_list_header{ @@ -74,7 +74,7 @@ bool DmaPusher::Step() { } // Push buffer non-empty, read a word - command_headers.resize(command_list_header.size); + command_headers.resize_destructive(command_list_header.size); if (Settings::IsGPULevelHigh()) { memory_manager.ReadBlock(dma_get, command_headers.data(), command_list_header.size * sizeof(u32)); @@ -82,16 +82,21 @@ bool DmaPusher::Step() { memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(), command_list_header.size * sizeof(u32)); } + ProcessCommands(command_headers); } - for (std::size_t index = 0; index < command_headers.size();) { - const CommandHeader& command_header = command_headers[index]; + + return true; +} + +void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) { + for (std::size_t index = 0; index < commands.size();) { + const CommandHeader& command_header = commands[index]; if (dma_state.method_count) { // Data word of methods command if (dma_state.non_incrementing) { const u32 max_write = static_cast<u32>( - std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) - - index); + std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index); CallMultiMethod(&command_header.argument, max_write); dma_state.method_count -= max_write; dma_state.is_last_call = true; @@ -142,8 +147,6 @@ bool DmaPusher::Step() { } index++; } - - return true; } void DmaPusher::SetState(const CommandHeader& command_header) { diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 938f0f11c..6f00de937 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -4,11 +4,13 @@ #pragma once #include <array> +#include <span> #include <vector> #include <queue> #include "common/bit_field.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/puller.h" @@ -136,13 +138,15 @@ private: static constexpr u32 non_puller_methods = 0x40; static constexpr u32 max_subchannels = 8; bool Step(); + void ProcessCommands(std::span<const CommandHeader> commands); void SetState(const CommandHeader& command_header); void CallMethod(u32 argument) const; void CallMultiMethod(const u32* base_start, u32 num_methods) const; - std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once + Common::ScratchBuffer<CommandHeader> + command_headers; ///< Buffer for list of commands fetched at once std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer @@ -159,7 +163,7 @@ private: DmaState dma_state{}; bool dma_increment_once{}; - bool ib_enable{true}; ///< IB mode enabled + const bool ib_enable{true}; ///< IB mode enabled std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index b213c374f..3a78421f6 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -46,21 +46,26 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { SetInlineIndexBuffer(regs.inline_index_4x8.index2); SetInlineIndexBuffer(regs.inline_index_4x8.index3); break; - case MAXWELL3D_REG_INDEX(topology_override): - use_topology_override = true; + case MAXWELL3D_REG_INDEX(vertex_array_instance_first): + case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): { + LOG_WARNING(HW_GPU, "(STUBBED) called"); break; + } default: break; } } void DrawManager::Clear(u32 layer_count) { - maxwell3d->rasterizer->Clear(layer_count); + if (maxwell3d->ShouldExecute()) { + maxwell3d->rasterizer->Clear(layer_count); + } } void DrawManager::DrawDeferred() { - if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0) + if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0) { return; + } DrawEnd(draw_state.instance_count + 1, true); draw_state.instance_count = 0; } @@ -115,8 +120,9 @@ void DrawManager::DrawEnd(u32 instance_count, bool force_draw) { const auto& regs{maxwell3d->regs}; switch (draw_state.draw_mode) { case DrawMode::Instance: - if (!force_draw) + if (!force_draw) { break; + } [[fallthrough]]; case DrawMode::General: draw_state.base_instance = regs.global_base_instance_index; @@ -156,25 +162,28 @@ void DrawManager::DrawIndexSmall(u32 argument) { ProcessDraw(true, 1); } -void DrawManager::ProcessTopologyOverride() { - if (!use_topology_override) - return; - +void DrawManager::UpdateTopology() { const auto& regs{maxwell3d->regs}; - switch (regs.topology_override) { - case PrimitiveTopologyOverride::None: - break; - case PrimitiveTopologyOverride::Points: - draw_state.topology = PrimitiveTopology::Points; - break; - case PrimitiveTopologyOverride::Lines: - draw_state.topology = PrimitiveTopology::Lines; - break; - case PrimitiveTopologyOverride::LineStrip: - draw_state.topology = PrimitiveTopology::LineStrip; + switch (regs.primitive_topology_control) { + case PrimitiveTopologyControl::UseInBeginMethods: break; - default: - draw_state.topology = static_cast<PrimitiveTopology>(regs.topology_override); + case PrimitiveTopologyControl::UseSeparateState: + switch (regs.topology_override) { + case PrimitiveTopologyOverride::None: + break; + case PrimitiveTopologyOverride::Points: + draw_state.topology = PrimitiveTopology::Points; + break; + case PrimitiveTopologyOverride::Lines: + draw_state.topology = PrimitiveTopology::Lines; + break; + case PrimitiveTopologyOverride::LineStrip: + draw_state.topology = PrimitiveTopology::LineStrip; + break; + default: + draw_state.topology = static_cast<PrimitiveTopology>(regs.topology_override); + break; + } break; } } @@ -183,9 +192,10 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); - ProcessTopologyOverride(); + UpdateTopology(); - if (maxwell3d->ShouldExecute()) + if (maxwell3d->ShouldExecute()) { maxwell3d->rasterizer->Draw(draw_indexed, instance_count); + } } } // namespace Tegra::Engines diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 4f67027ca..0e6930a9c 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -10,6 +10,7 @@ class RasterizerInterface; } namespace Tegra::Engines { +using PrimitiveTopologyControl = Maxwell3D::Regs::PrimitiveTopologyControl; using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; using IndexBuffer = Maxwell3D::Regs::IndexBuffer; @@ -58,12 +59,11 @@ private: void DrawIndexSmall(u32 argument); - void ProcessTopologyOverride(); + void UpdateTopology(); void ProcessDraw(bool draw_indexed, u32 instance_count); Maxwell3D* maxwell3d{}; State draw_state{}; - bool use_topology_override{}; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index e4f8331ab..cea1dd8b0 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -24,7 +24,7 @@ void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { void State::ProcessExec(const bool is_linear_) { write_offset = 0; copy_size = regs.line_length_in * regs.line_count; - inner_buffer.resize(copy_size); + inner_buffer.resize_destructive(copy_size); is_linear = is_linear_; } @@ -70,7 +70,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { const std::size_t dst_size = Tegra::Texture::CalculateSize( true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, regs.dest.BlockHeight(), regs.dest.BlockDepth()); - tmp_buffer.resize(dst_size); + tmp_buffer.resize_destructive(dst_size); memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 94fafd9dc..7242d2529 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -4,9 +4,10 @@ #pragma once #include <span> -#include <vector> + #include "common/bit_field.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" namespace Tegra { class MemoryManager; @@ -73,8 +74,8 @@ private: u32 write_offset = 0; u32 copy_size = 0; - std::vector<u8> inner_buffer; - std::vector<u8> tmp_buffer; + Common::ScratchBuffer<u8> inner_buffer; + Common::ScratchBuffer<u8> tmp_buffer; bool is_linear = false; Registers& regs; MemoryManager& memory_manager; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a189e60ae..f73d7bf0f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -184,12 +184,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } + read_buffer.resize_destructive(src_size); + write_buffer.resize_destructive(dst_size); memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); @@ -235,12 +231,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() { CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } + read_buffer.resize_destructive(src_size); + write_buffer.resize_destructive(dst_size); memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); if (Settings::IsGPULevelExtreme()) { @@ -269,12 +261,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { pos_x = pos_x % x_in_gob; pos_y = pos_y % 8; - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } + read_buffer.resize_destructive(src_size); + write_buffer.resize_destructive(dst_size); if (Settings::IsGPULevelExtreme()) { memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); @@ -333,14 +321,10 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { const u32 pitch = x_elements * bytes_per_pixel; const size_t mid_buffer_size = pitch * regs.line_count; - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } + read_buffer.resize_destructive(src_size); + write_buffer.resize_destructive(dst_size); - intermediate_buffer.resize(mid_buffer_size); + intermediate_buffer.resize_destructive(mid_buffer_size); memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index d40d3d302..c88191a61 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -6,8 +6,10 @@ #include <array> #include <cstddef> #include <vector> + #include "common/bit_field.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "video_core/engines/engine_interface.h" namespace Core { @@ -234,9 +236,9 @@ private: MemoryManager& memory_manager; VideoCore::RasterizerInterface* rasterizer = nullptr; - std::vector<u8> read_buffer; - std::vector<u8> write_buffer; - std::vector<u8> intermediate_buffer; + Common::ScratchBuffer<u8> read_buffer; + Common::ScratchBuffer<u8> write_buffer; + Common::ScratchBuffer<u8> intermediate_buffer; static constexpr std::size_t NUM_REGS = 0x800; struct Regs { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 28b38273e..c6d54be63 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -223,8 +223,6 @@ struct GPU::Impl { /// core timing events. void Start() { gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); - cpu_context = renderer->GetRenderWindow().CreateSharedContext(); - cpu_context->MakeCurrent(); } void NotifyShutdown() { @@ -235,6 +233,9 @@ struct GPU::Impl { /// Obtain the CPU Context void ObtainContext() { + if (!cpu_context) { + cpu_context = renderer->GetRenderWindow().CreateSharedContext(); + } cpu_context->MakeCurrent(); } diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index ac0b7d20e..36a04e4e0 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -155,7 +155,7 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { // swizzle pitch linear to block linear const u32 block_height = static_cast<u32>(config.block_linear_height_log2); const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); - luma_buffer.resize(size); + luma_buffer.resize_destructive(size); std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height); Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, block_height, 0, width * 4); @@ -181,8 +181,8 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { const auto stride = static_cast<size_t>(frame->linesize[0]); - luma_buffer.resize(aligned_width * surface_height); - chroma_buffer.resize(aligned_width * surface_height / 2); + luma_buffer.resize_destructive(aligned_width * surface_height); + chroma_buffer.resize_destructive(aligned_width * surface_height / 2); // Populate luma buffer const u8* luma_src = frame->data[0]; diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h index 2b78786e8..3d9753047 100644 --- a/src/video_core/host1x/vic.h +++ b/src/video_core/host1x/vic.h @@ -4,8 +4,9 @@ #pragma once #include <memory> -#include <vector> + #include "common/common_types.h" +#include "common/scratch_buffer.h" struct SwsContext; @@ -49,8 +50,8 @@ private: /// size does not change during a stream using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; AVMallocPtr converted_frame_buffer; - std::vector<u8> luma_buffer; - std::vector<u8> chroma_buffer; + Common::ScratchBuffer<u8> luma_buffer; + Common::ScratchBuffer<u8> chroma_buffer; GPUVAddr config_struct_address{}; GPUVAddr output_surface_luma_address{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index e2e3dac34..cee5c3247 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -112,7 +112,7 @@ bool IsASTCSupported() { } } // Anonymous namespace -Device::Device() { +Device::Device(Core::Frontend::EmuWindow& emu_window) { if (!GLAD_GL_VERSION_4_6) { LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); throw std::runtime_error{"Insufficient version"}; @@ -126,9 +126,9 @@ Device::Device() { const bool is_intel = vendor_name == "Intel"; #ifdef __unix__ - const bool is_linux = true; + constexpr bool is_linux = true; #else - const bool is_linux = false; + constexpr bool is_linux = false; #endif bool disable_fast_buffer_sub_data = false; @@ -193,9 +193,11 @@ Device::Device() { } } + strict_context_required = emu_window.StrictContextRequired(); // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. + // Blocks EGL on Wayland from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - !(is_amd || (is_intel && !is_linux)); + !(is_amd || (is_intel && !is_linux)) && !strict_context_required; use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 5ef51ebcf..2a72d84be 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -5,6 +5,7 @@ #include <cstddef> #include "common/common_types.h" +#include "core/frontend/emu_window.h" #include "shader_recompiler/stage.h" namespace Settings { @@ -15,7 +16,7 @@ namespace OpenGL { class Device { public: - explicit Device(); + explicit Device(Core::Frontend::EmuWindow& emu_window); [[nodiscard]] std::string GetVendorName() const; @@ -173,6 +174,10 @@ public: return can_report_memory; } + bool StrictContextRequired() const { + return strict_context_required; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -216,6 +221,7 @@ private: bool has_cbuf_ftou_bug{}; bool has_bool_ref_bug{}; bool can_report_memory{}; + bool strict_context_required{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 64ed6f628..a44b8c454 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -138,9 +138,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load void RasterizerOpenGL::Clear(u32 layer_count) { MICROPROFILE_SCOPE(OpenGL_Clears); - if (!maxwell3d->ShouldExecute()) { - return; - } const auto& regs = maxwell3d->regs; bool use_color{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a59d0d24e..f8868a012 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -174,6 +174,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, + strict_context_required{device.StrictContextRequired()}, profile{ .supported_spirv = 0x00010000, @@ -203,6 +204,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), + .support_native_ndc = true, .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), @@ -255,9 +257,14 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, } shader_cache_filename = base_dir / "opengl.bin"; - if (!workers) { + if (!workers && !strict_context_required) { workers = CreateWorkers(); } + std::optional<Context> strict_context; + if (strict_context_required) { + strict_context.emplace(emu_window); + } + struct { std::mutex mutex; size_t total{}; @@ -265,44 +272,49 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, bool has_loaded{}; } state; + const auto queue_work{[&](Common::UniqueFunction<void, Context*>&& work) { + if (strict_context_required) { + work(&strict_context.value()); + } else { + workers->QueueWork(std::move(work)); + } + }}; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { ComputePipelineKey key; file.read(reinterpret_cast<char*>(&key), sizeof(key)); - workers->QueueWork( - [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { - ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; - std::scoped_lock lock{state.mutex}; - if (pipeline) { - compute_cache.emplace(key, std::move(pipeline)); - } - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); + queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { + ctx->pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; + std::scoped_lock lock{state.mutex}; + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); ++state.total; }}; const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { GraphicsPipelineKey key; file.read(reinterpret_cast<char*>(&key), sizeof(key)); - workers->QueueWork( - [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { - boost::container::static_vector<Shader::Environment*, 5> env_ptrs; - for (auto& env : envs) { - env_ptrs.push_back(&env); - } - ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; - std::scoped_lock lock{state.mutex}; - if (pipeline) { - graphics_cache.emplace(key, std::move(pipeline)); - } - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); + queue_work([this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { + boost::container::static_vector<Shader::Environment*, 5> env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + ctx->pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + std::scoped_lock lock{state.mutex}; + if (pipeline) { + graphics_cache.emplace(key, std::move(pipeline)); + } + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); ++state.total; }}; LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics); @@ -314,6 +326,9 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, state.has_loaded = true; lock.unlock(); + if (strict_context_required) { + return; + } workers->WaitForRequests(stop_loading); if (!use_asynchronous_shaders) { workers.reset(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 53ffea904..f82420592 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -69,6 +69,7 @@ private: StateTracker& state_tracker; VideoCore::ShaderNotify& shader_notify; const bool use_asynchronous_shaders; + const bool strict_context_required; GraphicsPipelineKey graphics_key{}; GraphicsPipeline* current_pipeline{}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 5b5e178ad..bc75680f0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -140,8 +140,8 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr<Core::Frontend::GraphicsContext> context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, - emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{}, - program_manager{device}, + emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_}, + state_tracker{}, program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 18be54729..f502a7d09 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -139,23 +139,25 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { RenderScreenshot(*framebuffer, use_accelerated); bool has_been_recreated = false; - const auto recreate_swapchain = [&] { + const auto recreate_swapchain = [&](u32 width, u32 height) { if (!has_been_recreated) { has_been_recreated = true; scheduler.Finish(); } - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); - swapchain.Create(layout.width, layout.height, is_srgb); + swapchain.Create(width, height, is_srgb); }; - if (swapchain.NeedsRecreation(is_srgb)) { - recreate_swapchain(); + + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); + if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width || + swapchain.GetHeight() != layout.height) { + recreate_swapchain(layout.width, layout.height); } bool is_outdated; do { swapchain.AcquireNextImage(); is_outdated = swapchain.IsOutDated(); if (is_outdated) { - recreate_swapchain(); + recreate_swapchain(layout.width, layout.height); } } while (is_outdated); if (has_been_recreated) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 54a12b35f..6b54d7111 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -461,6 +461,9 @@ void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 fir void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride) { + if (index >= device.GetMaxVertexInputBindings()) { + return; + } if (device.IsExtExtendedDynamicStateSupported()) { scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 006128638..515d8d869 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -529,7 +529,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; if (key.state.dynamic_vertex_input) { - for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const size_t num_vertex_arrays = std::min( + key.state.attributes.size(), static_cast<size_t>(device.GetMaxVertexInputBindings())); + for (size_t index = 0; index < num_vertex_arrays; ++index) { const u32 type = key.state.DynamicAttributeType(index); if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; @@ -551,7 +553,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } else { - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const size_t num_vertex_arrays = std::min( + Maxwell::NumVertexArrays, static_cast<size_t>(device.GetMaxVertexInputBindings())); + for (size_t index = 0; index < num_vertex_arrays; ++index) { const bool instanced = key.state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; @@ -580,6 +584,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } + ASSERT(vertex_attributes.size() <= device.GetMaxVertexInputAttributes()); + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .pNext = nullptr, @@ -634,23 +640,33 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }; std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewportSwizzles = swizzles.data(), }; - const VkPipelineViewportStateCreateInfo viewport_ci{ + VkPipelineViewportDepthClipControlCreateInfoEXT ndc_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT, + .pNext = nullptr, + .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE, + }; + VkPipelineViewportStateCreateInfo viewport_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, + .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewports = nullptr, .scissorCount = Maxwell::NumViewports, .pScissors = nullptr, }; - + if (device.IsNvViewportSwizzleSupported()) { + swizzle_ci.pNext = std::exchange(viewport_ci.pNext, &swizzle_ci); + } + if (device.IsExtDepthClipControlSupported()) { + ndc_info.pNext = std::exchange(viewport_ci.pNext, &ndc_info); + } VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 81f5f3e11..e7262420c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -321,6 +321,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), + .support_native_ndc = device.IsExtDepthClipControlSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -341,6 +342,15 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), }; + + if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { + LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}", + device.GetMaxVertexInputAttributes(), Maxwell::NumVertexAttributes); + } + if (device.GetMaxVertexInputBindings() < Maxwell::NumVertexArrays) { + LOG_WARNING(Render_Vulkan, "maxVertexInputBindings is too low: {} < {}", + device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays); + } } PipelineCache::~PipelineCache() = default; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3774f303a..ac1eb9895 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -220,9 +220,6 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); - if (!maxwell3d->ShouldExecute()) { - return; - } FlushWork(); query_cache.UpdateCounters(); @@ -665,8 +662,7 @@ void RasterizerVulkan::BeginTransformFeedback() { return; } UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry)); + regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); scheduler.Record( [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index d7be417f5..b6810eef9 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -67,17 +67,19 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi } // Anonymous namespace -Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width, - u32 height, bool srgb) +Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, + u32 width_, u32 height_, bool srgb) : surface{surface_}, device{device_}, scheduler{scheduler_} { - Create(width, height, srgb); + Create(width_, height_, srgb); } Swapchain::~Swapchain() = default; -void Swapchain::Create(u32 width, u32 height, bool srgb) { +void Swapchain::Create(u32 width_, u32 height_, bool srgb) { is_outdated = false; is_suboptimal = false; + width = width_; + height = height_; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -88,7 +90,7 @@ void Swapchain::Create(u32 width, u32 height, bool srgb) { device.GetLogical().WaitIdle(); Destroy(); - CreateSwapchain(capabilities, width, height, srgb); + CreateSwapchain(capabilities, srgb); CreateSemaphores(); CreateImageViews(); @@ -148,8 +150,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) { } } -void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, - bool srgb) { +void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) { const auto physical_device{device.GetPhysical()}; const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 111b3902d..caf1ff32b 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -80,9 +80,16 @@ public: return *present_semaphores[frame_index]; } + u32 GetWidth() const { + return width; + } + + u32 GetHeight() const { + return height; + } + private: - void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, - bool srgb); + void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); void CreateSemaphores(); void CreateImageViews(); @@ -105,6 +112,9 @@ private: std::vector<u64> resource_ticks; std::vector<vk::Semaphore> present_semaphores; + u32 width; + u32 height; + u32 image_index{}; u32 frame_index{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8e68a2e53..27c82cd20 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -39,6 +39,12 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); sampler_descriptor.cubemap_anisotropy.Assign(1); + // These values were chosen based on typical peak swizzle data sizes seen in some titles + static constexpr size_t SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 8_MiB; + static constexpr size_t UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 1_MiB; + swizzle_data_buffer.resize_destructive(SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY); + unswizzle_data_buffer.resize_destructive(UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY); + // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant void(slot_images.insert(NullImageParams{})); @@ -90,7 +96,8 @@ void TextureCache<P>::RunGarbageCollector() { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, + swizzle_data_buffer); } if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, image_id); @@ -461,7 +468,8 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, + swizzle_data_buffer); } } @@ -672,7 +680,8 @@ void TextureCache<P>::PopAsyncFlushes() { for (const ImageId image_id : download_ids) { const ImageBase& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, + swizzle_data_buffer); download_map.offset += image.unswizzled_size_bytes; download_span = download_span.subspan(image.unswizzled_size_bytes); } @@ -734,13 +743,21 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info); runtime.AccelerateImageUpload(image, staging, uploads); - } else if (True(image.flags & ImageFlagBits::Converted)) { - std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); - ConvertImage(unswizzled_data, image.info, mapped_span, copies); + return; + } + const size_t guest_size_bytes = image.guest_size_bytes; + swizzle_data_buffer.resize_destructive(guest_size_bytes); + gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); + + if (True(image.flags & ImageFlagBits::Converted)) { + unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, + unswizzle_data_buffer); + ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); image.UploadMemory(staging, copies); } else { - const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); + const auto copies = + UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); image.UploadMemory(staging, copies); } } @@ -910,7 +927,7 @@ void TextureCache<P>::InvalidateScale(Image& image) { } template <class P> -u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) { +u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) { const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * Settings::values.resolution_info.up_scale); const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 587339a31..4fd677a80 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -17,6 +17,7 @@ #include "common/literals.h" #include "common/lru_cache.h" #include "common/polyfill_ranges.h" +#include "common/scratch_buffer.h" #include "video_core/compatible_formats.h" #include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" @@ -368,7 +369,7 @@ private: void InvalidateScale(Image& image); bool ScaleUp(Image& image); bool ScaleDown(Image& image); - u64 GetScaledImageSizeBytes(ImageBase& image); + u64 GetScaledImageSizeBytes(const ImageBase& image); Runtime& runtime; @@ -417,6 +418,9 @@ private: std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; + Common::ScratchBuffer<u8> swizzle_data_buffer; + Common::ScratchBuffer<u8> unswizzle_data_buffer; + u64 modification_tick = 0; u64 frame_tick = 0; }; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index e8c908b42..03acc68d9 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -505,7 +505,7 @@ void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, const BufferImageCopy& copy, - std::span<const u8> input) { + std::span<const u8> input, Common::ScratchBuffer<u8>& tmp_buffer) { const Extent3D size = info.size; const LevelInfo level_info = MakeLevelInfo(info); const Extent2D tile_size = DefaultBlockSize(info.format); @@ -534,8 +534,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr tile_size.height, info.tile_width_spacing); const size_t subresource_size = sizes[level]; - const auto dst_data = std::make_unique<u8[]>(subresource_size); - const std::span<u8> dst(dst_data.get(), subresource_size); + tmp_buffer.resize_destructive(subresource_size); + const std::span<u8> dst(tmp_buffer); for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span<const u8> src = input.subspan(host_offset); @@ -765,8 +765,9 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config } std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageInfo& info, std::span<u8> output) { - const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); + const ImageInfo& info, std::span<const u8> input, + std::span<u8> output) { + const size_t guest_size_bytes = input.size_bytes(); const u32 bpp_log2 = BytesPerBlockLog2(info.format); const Extent3D size = info.size; @@ -789,10 +790,6 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP .image_extent = size, }}; } - const auto input_data = std::make_unique<u8[]>(guest_size_bytes); - gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); - const std::span<const u8> input(input_data.get(), guest_size_bytes); - const LevelInfo level_info = MakeLevelInfo(info); const s32 num_layers = info.resources.layers; const s32 num_levels = info.resources.levels; @@ -980,13 +977,14 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { } void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, - std::span<const BufferImageCopy> copies, std::span<const u8> memory) { + std::span<const BufferImageCopy> copies, std::span<const u8> memory, + Common::ScratchBuffer<u8>& tmp_buffer) { const bool is_pitch_linear = info.type == ImageType::Linear; for (const BufferImageCopy& copy : copies) { if (is_pitch_linear) { SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); } else { - SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); + SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory, tmp_buffer); } } } diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 5e28f4ab3..d103db8ae 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -7,6 +7,7 @@ #include <span> #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "video_core/surface.h" #include "video_core/texture_cache/image_base.h" @@ -59,6 +60,7 @@ struct OverlapResult { [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span<const u8> input, std::span<u8> output); [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, @@ -76,7 +78,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, - std::span<const BufferImageCopy> copies, std::span<const u8> memory); + std::span<const BufferImageCopy> copies, std::span<const u8> memory, + Common::ScratchBuffer<u8>& tmp_buffer); [[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, const ImageInfo& overlap_info, u32 new_level, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6a2ad4b1d..c4d31681a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -421,7 +421,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice8BitStorageFeatures bit8_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, .pNext = nullptr, - .storageBuffer8BitAccess = false, + .storageBuffer8BitAccess = true, .uniformAndStorageBuffer8BitAccess = true, .storagePushConstant8 = false, }; @@ -660,6 +660,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } + VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; + if (ext_depth_clip_control) { + depth_clip_control_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT, + .pNext = nullptr, + .depthClipControl = VK_TRUE, + }; + SetNext(next, depth_clip_control_features); + } + VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); @@ -1044,6 +1054,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"), std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess, "uniformAndStorageBuffer16BitAccess"), + std::make_pair(bit8_storage.storageBuffer8BitAccess, "storageBuffer8BitAccess"), std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess, "uniformAndStorageBuffer8BitAccess"), std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"), @@ -1083,6 +1094,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { bool has_ext_vertex_input_dynamic_state{}; bool has_ext_line_rasterization{}; bool has_ext_primitive_topology_list_restart{}; + bool has_ext_depth_clip_control{}; for (const std::string& extension : supported_extensions) { const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, bool push) { @@ -1116,6 +1128,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, true); + test(has_ext_depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME, false); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); @@ -1279,6 +1292,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { ext_line_rasterization = true; } } + if (has_ext_depth_clip_control) { + VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; + depth_clip_control_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT; + depth_clip_control_features.pNext = nullptr; + features.pNext = &depth_clip_control_features; + physical.GetFeatures2(features); + + if (depth_clip_control_features.depthClipControl) { + extensions.push_back(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + ext_depth_clip_control = true; + } + } if (has_khr_workgroup_memory_explicit_layout) { VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; layout.sType = @@ -1380,6 +1406,10 @@ void Device::SetupFeatures() { is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); + + const VkPhysicalDeviceLimits& limits{properties.limits}; + max_vertex_input_attributes = limits.maxVertexInputAttributes; + max_vertex_input_bindings = limits.maxVertexInputBindings; } void Device::SetupProperties() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index db802437c..6a26c4e6e 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -256,6 +256,11 @@ public: return ext_depth_range_unrestricted; } + /// Returns true if the device supports VK_EXT_depth_clip_control. + bool IsExtDepthClipControlSupported() const { + return ext_depth_clip_control; + } + /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. bool IsExtShaderViewportIndexLayerSupported() const { return ext_shader_viewport_index_layer; @@ -368,6 +373,14 @@ public: return must_emulate_bgr565; } + u32 GetMaxVertexInputAttributes() const { + return max_vertex_input_attributes; + } + + u32 GetMaxVertexInputBindings() const { + return max_vertex_input_bindings; + } + private: /// Checks if the physical device is suitable. void CheckSuitability(bool requires_swapchain) const; @@ -446,6 +459,7 @@ private: bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. + bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. @@ -467,6 +481,8 @@ private: bool supports_d24_depth{}; ///< Supports D24 depth buffers. bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline + u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 483b534a0..7dca7341c 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -314,6 +314,18 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_VALIDATION_FAILED_EXT"; case VkResult::VK_ERROR_INVALID_SHADER_NV: return "VK_ERROR_INVALID_SHADER_NV"; + case VkResult::VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR: + return "VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR"; + case VkResult::VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR: + return "VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR"; + case VkResult::VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR: + return "VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR"; + case VkResult::VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR: + return "VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR"; + case VkResult::VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR: + return "VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR"; + case VkResult::VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR: + return "VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR"; case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT: return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT"; case VkResult::VK_ERROR_FRAGMENTATION_EXT: |