diff options
Diffstat (limited to 'src/video_core')
29 files changed, 166 insertions, 97 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 627917ab6..06fd40851 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1938,21 +1938,14 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); - const u32 alignment = runtime.GetStorageBufferAlignment(); - - const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); - const u32 aligned_size = - Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment); - - const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr || size == 0) { return NULL_BINDING; } - - const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE); + const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); const Binding binding{ .cpu_addr = *cpu_addr, - .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr), + .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), .buffer_id = BufferId{}, }; return binding; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ae9da6290..614d61db4 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -186,6 +186,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) { case MAXWELL3D_REG_INDEX(launch_dma): case MAXWELL3D_REG_INDEX(inline_data): case MAXWELL3D_REG_INDEX(fragment_barrier): + case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache): case MAXWELL3D_REG_INDEX(tiled_cache_barrier): return true; default: @@ -258,7 +259,7 @@ u32 Maxwell3D::GetMaxCurrentVertices() { size_t Maxwell3D::EstimateIndexBufferSize() { GPUVAddr start_address = regs.index_buffer.StartAddress(); GPUVAddr end_address = regs.index_buffer.EndAddress(); - constexpr std::array<size_t, 4> max_sizes = { + static constexpr std::array<size_t, 4> max_sizes = { std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); @@ -375,6 +376,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return; case MAXWELL3D_REG_INDEX(fragment_barrier): return rasterizer->FragmentBarrier(); + case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache): + rasterizer->InvalidateGPUCache(); + return rasterizer->WaitForIdle(); case MAXWELL3D_REG_INDEX(tiled_cache_barrier): return rasterizer->TiledCacheBarrier(); default: diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index c6d54be63..7024a19cf 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -99,7 +99,7 @@ struct GPU::Impl { /// Signal the ending of command list. void OnCommandListEnd() { - gpu_thread.OnCommandListEnd(); + rasterizer->ReleaseFences(); } /// Request a host GPU memory flush from the CPU. diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 164a5252a..9c103c0d4 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -40,8 +40,6 @@ static void RunThread(std::stop_token stop_token, Core::System& system, scheduler.Push(submit_list->channel, std::move(submit_list->entries)); } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); - } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { - rasterizer->ReleaseFences(); } else if (std::holds_alternative<GPUTickCommand>(next.data)) { system.GPU().TickWork(); } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { @@ -110,10 +108,6 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { rasterizer->OnCPUWrite(addr, size); } -void ThreadManager::OnCommandListEnd() { - PushCommand(OnCommandListEndCommand()); -} - u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { if (!is_async) { // In synchronous GPU mode, block the caller until the command has executed diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index c71a419c7..90bcb5958 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -77,16 +77,12 @@ struct FlushAndInvalidateRegionCommand final { u64 size; }; -/// Command called within the gpu, to schedule actions after a command list end -struct OnCommandListEndCommand final {}; - /// Command to make the gpu look into pending requests struct GPUTickCommand final {}; using CommandData = std::variant<std::monostate, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, - InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, - GPUTickCommand>; + InvalidateRegionCommand, FlushAndInvalidateRegionCommand, GPUTickCommand>; struct CommandDataContainer { CommandDataContainer() = default; @@ -134,8 +130,6 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(VAddr addr, u64 size); - void OnCommandListEnd(); - void TickGPU(); private: diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 42e7d6e4f..3e9022dce 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -152,6 +152,8 @@ bool Codec::CreateGpuAvDevice() { void Codec::InitializeAvCodecContext() { av_codec_ctx = avcodec_alloc_context3(av_codec); av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + av_codec_ctx->thread_count = 0; + av_codec_ctx->thread_type &= ~FF_THREAD_FRAME; } void Codec::InitializeGpuDecoder() { diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 36a04e4e0..10d7ef884 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -189,9 +189,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { for (std::size_t y = 0; y < frame_height; ++y) { const std::size_t src = y * stride; const std::size_t dst = y * aligned_width; - for (std::size_t x = 0; x < frame_width; ++x) { - luma_buffer[dst + x] = luma_src[src + x]; - } + std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width); } host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), luma_buffer.size()); @@ -205,15 +203,15 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { // Frame from FFmpeg software // Populate chroma buffer from both channels with interleaving. const std::size_t half_width = frame_width / 2; + u8* chroma_buffer_data = chroma_buffer.data(); const u8* chroma_b_src = frame->data[1]; const u8* chroma_r_src = frame->data[2]; for (std::size_t y = 0; y < half_height; ++y) { const std::size_t src = y * half_stride; const std::size_t dst = y * aligned_width; - for (std::size_t x = 0; x < half_width; ++x) { - chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; - chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; + chroma_buffer_data[dst + x * 2] = chroma_b_src[src + x]; + chroma_buffer_data[dst + x * 2 + 1] = chroma_r_src[src + x]; } } break; @@ -225,9 +223,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { for (std::size_t y = 0; y < half_height; ++y) { const std::size_t src = y * stride; const std::size_t dst = y * aligned_width; - for (std::size_t x = 0; x < frame_width; ++x) { - chroma_buffer[dst + x] = chroma_src[src + x]; - } + std::memcpy(chroma_buffer.data() + dst, chroma_src + src, frame_width); } break; } diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 52cd5bb81..2442c3c29 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -22,6 +22,8 @@ set(SHADER_FILES convert_d24s8_to_abgr8.frag convert_depth_to_float.frag convert_float_to_depth.frag + convert_msaa_to_non_msaa.comp + convert_non_msaa_to_msaa.comp convert_s8d24_to_abgr8.frag full_screen_triangle.vert fxaa.frag diff --git a/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp new file mode 100644 index 000000000..fc3854d18 --- /dev/null +++ b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0, rgba8) uniform readonly restrict image2DMSArray msaa_in; +layout (binding = 1, rgba8) uniform writeonly restrict image2DArray output_img; + +void main() { + const ivec3 coords = ivec3(gl_GlobalInvocationID); + if (any(greaterThanEqual(coords, imageSize(msaa_in)))) { + return; + } + + // TODO: Specialization constants for num_samples? + const int num_samples = imageSamples(msaa_in); + for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { + const vec4 pixel = imageLoad(msaa_in, coords, curr_sample); + + const int single_sample_x = 2 * coords.x + (curr_sample & 1); + const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); + const ivec3 dest_coords = ivec3(single_sample_x, single_sample_y, coords.z); + + if (any(greaterThanEqual(dest_coords, imageSize(output_img)))) { + continue; + } + imageStore(output_img, dest_coords, pixel); + } +} diff --git a/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp new file mode 100644 index 000000000..dedd962f1 --- /dev/null +++ b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0, rgba8) uniform readonly restrict image2DArray img_in; +layout (binding = 1, rgba8) uniform writeonly restrict image2DMSArray output_msaa; + +void main() { + const ivec3 coords = ivec3(gl_GlobalInvocationID); + if (any(greaterThanEqual(coords, imageSize(output_msaa)))) { + return; + } + + // TODO: Specialization constants for num_samples? + const int num_samples = imageSamples(output_msaa); + for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { + const int single_sample_x = 2 * coords.x + (curr_sample & 1); + const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); + const ivec3 single_coords = ivec3(single_sample_x, single_sample_y, coords.z); + + if (any(greaterThanEqual(single_coords, imageSize(img_in)))) { + continue; + } + const vec4 pixel = imageLoad(img_in, single_coords); + imageStore(output_msaa, coords, curr_sample, pixel); + } +} diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 9ebfb6179..cf56392ef 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -216,7 +216,7 @@ private: std::vector<u64> big_page_continous; std::vector<std::pair<VAddr, std::size_t>> page_stash{}; - constexpr static size_t continous_bits = 64; + static constexpr size_t continous_bits = 64; const size_t unique_identifier; std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index bb1962073..a8c3f8b67 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -160,10 +160,6 @@ public: return device.CanReportMemoryUsage(); } - u32 GetStorageBufferAlignment() const { - return static_cast<u32>(device.GetShaderStorageBufferAlignment()); - } - private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 626ea7dcb..479bb8ba3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -236,7 +236,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), - .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), } { if (use_asynchronous_shaders) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9f7ce7414..eb6e43a08 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -557,6 +557,14 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, } } +void TextureCacheRuntime::CopyImageMSAA(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies) { + LOG_DEBUG(Render_OpenGL, "Copying from {} samples to {} samples", src_image.info.num_samples, + dst_image.info.num_samples); + // TODO: Leverage the format conversion pass if possible/accurate. + util_shaders.CopyMSAA(dst_image, src_image, copies); +} + void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) { LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 5d9d370f2..e30875496 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -93,12 +93,19 @@ public: return device.CanReportMemoryUsage(); } - bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { + bool ShouldReinterpret([[maybe_unused]] Image& dst, + [[maybe_unused]] Image& src) const noexcept { + return true; + } + + bool CanUploadMSAA() const noexcept { return true; } void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 404def62e..2c7ac210b 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -12,6 +12,8 @@ #include "video_core/host_shaders/astc_decoder_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/convert_msaa_to_non_msaa_comp.h" +#include "video_core/host_shaders/convert_non_msaa_to_msaa_comp.h" #include "video_core/host_shaders/opengl_convert_s8d24_comp.h" #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" @@ -51,7 +53,9 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)), - convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) { + convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)), + convert_ms_to_nonms_program(MakeProgram(CONVERT_MSAA_TO_NON_MSAA_COMP)), + convert_nonms_to_ms_program(MakeProgram(CONVERT_NON_MSAA_TO_MSAA_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); @@ -269,6 +273,33 @@ void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copi program_manager.RestoreGuestCompute(); } +void UtilShaders::CopyMSAA(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies) { + const bool is_ms_to_non_ms = src_image.info.num_samples > 1 && dst_image.info.num_samples == 1; + const auto program_handle = + is_ms_to_non_ms ? convert_ms_to_nonms_program.handle : convert_nonms_to_ms_program.handle; + program_manager.BindComputeProgram(program_handle); + + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_subresource.base_layer == 0); + ASSERT(copy.src_subresource.num_layers == 1); + ASSERT(copy.dst_subresource.base_layer == 0); + ASSERT(copy.dst_subresource.num_layers == 1); + + glBindImageTexture(0, src_image.StorageHandle(), copy.src_subresource.base_level, GL_TRUE, + 0, GL_READ_ONLY, GL_RGBA8); + glBindImageTexture(1, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_TRUE, + 0, GL_WRITE_ONLY, GL_RGBA8); + + const u32 num_dispatches_x = Common::DivCeil(copy.extent.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(copy.extent.height, 8U); + const u32 num_dispatches_z = copy.extent.depth; + + glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); + } + program_manager.RestoreGuestCompute(); +} + GLenum StoreFormat(u32 bytes_per_block) { switch (bytes_per_block) { case 1: diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 44efb6ecf..9013808e7 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -40,6 +40,9 @@ public: void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies); + void CopyMSAA(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies); + private: ProgramManager& program_manager; @@ -51,6 +54,8 @@ private: OGLProgram pitch_unswizzle_program; OGLProgram copy_bc4_program; OGLProgram convert_s8d24_program; + OGLProgram convert_ms_to_nonms_program; + OGLProgram convert_nonms_to_ms_program; }; GLenum StoreFormat(u32 bytes_per_block); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1cfb4c2ff..b0153a502 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -330,10 +330,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } -u32 BufferCacheRuntime::GetStorageBufferAlignment() const { - return static_cast<u32>(device.GetStorageBufferAlignment()); -} - void BufferCacheRuntime::Finish() { scheduler.Finish(); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 06539c733..183b33632 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -73,8 +73,6 @@ public: bool CanReportMemoryUsage() const; - u32 GetStorageBufferAlignment() const; - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f91bb5a1d..baedc4424 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -548,31 +548,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; - if (key.state.dynamic_vertex_input) { - const size_t num_vertex_arrays = std::min( - key.state.attributes.size(), static_cast<size_t>(device.GetMaxVertexInputBindings())); - for (size_t index = 0; index < num_vertex_arrays; ++index) { - const u32 type = key.state.DynamicAttributeType(index); - if (!stage_infos[0].loads.Generic(index) || type == 0) { - continue; - } - vertex_attributes.push_back({ - .location = static_cast<u32>(index), - .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, - .offset = 0, - }); - } - if (!vertex_attributes.empty()) { - vertex_bindings.push_back({ - .binding = 0, - .stride = 4, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, - }); - } - } else { + if (!key.state.dynamic_vertex_input) { const size_t num_vertex_arrays = std::min( Maxwell::NumVertexArrays, static_cast<size_t>(device.GetMaxVertexInputBindings())); for (size_t index = 0; index < num_vertex_arrays; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7e69b11d8..0684cceed 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -344,7 +344,6 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), - .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), }; diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index 8eb735489..f8735189d 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp @@ -468,7 +468,7 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo> } void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { - constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ + static constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, @@ -528,8 +528,8 @@ SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, } void SMAA::CreateImages() { - constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; std::tie(m_static_images[Area], m_static_buffer_commits[Area]) = CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); @@ -586,12 +586,12 @@ void SMAA::CreateSampler() { void SMAA::CreateShaders() { // These match the order of the SMAAStage enum - constexpr std::array vert_shader_sources{ + static constexpr std::array vert_shader_sources{ ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV), ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV), ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV), }; - constexpr std::array frag_shader_sources{ + static constexpr std::array frag_shader_sources{ ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV), ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV), ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV), @@ -675,8 +675,8 @@ void SMAA::UploadImages(Scheduler& scheduler) { return; } - constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent, VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes)); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d39372ec4..9b85dfb5e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1230,6 +1230,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, }); } +void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, + std::span<const VideoCommon::ImageCopy> copies) { + UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan."); +} + u64 TextureCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 1f27a3589..0ce39616f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -70,6 +70,8 @@ public: void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + bool ShouldReinterpret(Image& dst, Image& src); void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); @@ -80,6 +82,11 @@ public: return false; } + bool CanUploadMSAA() const noexcept { + // TODO: Implement buffer to MSAA uploads + return false; + } + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span<const VideoCommon::SwizzleParameters>); @@ -106,7 +113,7 @@ public: std::optional<ASTCDecoderPass> astc_decoder_pass; const Settings::ResolutionScalingInfo& resolution; - constexpr static size_t indexing_slots = 8 * sizeof(size_t); + static constexpr size_t indexing_slots = 8 * sizeof(size_t); std::array<vk::Buffer, indexing_slots> buffers{}; std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{}; }; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index c42594149..db04943eb 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -48,7 +48,7 @@ void TurboMode::Run(std::stop_token stop_token) { auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); // Create the descriptor pool to contain our descriptor. - constexpr VkDescriptorPoolSize pool_size{ + static constexpr VkDescriptorPoolSize pool_size{ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, }; @@ -63,7 +63,7 @@ void TurboMode::Run(std::stop_token stop_token) { }); // Create the descriptor set layout from the pool. - constexpr VkDescriptorSetLayoutBinding layout_binding{ + static constexpr VkDescriptorSetLayoutBinding layout_binding{ .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 418890126..30f72361d 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -22,6 +22,9 @@ std::string Name(const ImageBase& image) { const u32 num_layers = image.info.resources.layers; const u32 num_levels = image.info.resources.levels; std::string resource; + if (image.info.num_samples > 1) { + resource += fmt::format(":{}xMSAA", image.info.num_samples); + } if (num_layers > 1) { resource += fmt::format(":L{}", num_layers); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1b01990a4..3e2cbb0b0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -773,7 +773,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { image.flags &= ~ImageFlagBits::CpuModified; TrackImage(image, image_id); - if (image.info.num_samples > 1) { + if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); return; } @@ -1167,14 +1167,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (True(overlap.flags & ImageFlagBits::GpuModified)) { new_image.flags |= ImageFlagBits::GpuModified; } + const auto& resolution = Settings::values.resolution_info; + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); if (overlap.info.num_samples != new_image.info.num_samples) { - LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + runtime.CopyImageMSAA(new_image, overlap, std::move(copies)); } else { - const auto& resolution = Settings::values.resolution_info; - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const u32 up_scale = can_rescale ? resolution.up_scale : 1; - const u32 down_shift = can_rescale ? resolution.down_shift : 0; - auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); runtime.CopyImage(new_image, overlap, std::move(copies)); } if (True(overlap.flags & ImageFlagBits::Tracked)) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 03acc68d9..697f86641 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -573,10 +573,6 @@ u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { if (info.type == ImageType::Buffer) { return info.size.width * BytesPerBlock(info.format); } - if (info.num_samples > 1) { - // Multisample images can't be uploaded or downloaded to the host - return 0; - } if (info.type == ImageType::Linear) { return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); } @@ -703,7 +699,6 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); - ASSERT(dst.num_samples == src.num_samples); const bool is_dst_3d = dst.type == ImageType::e3D; if (is_dst_3d) { diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 59120cd09..95bcdd37b 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -29,7 +29,7 @@ constexpr u32 pdep(u32 value) { template <u32 mask, u32 incr_amount> void incrpdep(u32& value) { - constexpr u32 swizzled_incr = pdep<mask>(incr_amount); + static constexpr u32 swizzled_incr = pdep<mask>(incr_amount); value = ((value | ~mask) + swizzled_incr) & mask; } |