diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 5 | ||||
-rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 4 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 24 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 13 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 6 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 11 |
9 files changed, 58 insertions, 14 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 081a574e8..f5b10411b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1192,11 +1192,6 @@ void BufferCache<P>::UpdateDrawIndirect() { .size = static_cast<u32>(size), .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), }; - VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); - VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); - IntervalType interval{cpu_addr_start, cpu_addr_end}; - ClearDownload(interval); - common_ranges.subtract(interval); }; if (current_draw_indirect->include_count) { update(current_draw_indirect->count_start_address, sizeof(u32), diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 02e161270..91f10aec2 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -72,7 +72,7 @@ void Fermi2D::Blit() { UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); const auto& args = regs.pixels_from_memory; - constexpr s64 null_derivate = 1ULL << 32; + constexpr s64 null_derivative = 1ULL << 32; Surface src = regs.src; const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && @@ -89,7 +89,7 @@ void Fermi2D::Blit() { .operation = regs.operation, .filter = args.sample_mode.filter, .must_accelerate = - args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu, + args.du_dx != null_derivative || args.dv_dy != null_derivative || delegate_to_gpu, .dst_x0 = args.dst_x0, .dst_y0 = args.dst_y0, .dst_x1 = args.dst_x0 + args.dst_width, diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 32d767d85..592c28ba3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -268,7 +268,7 @@ size_t Maxwell3D::EstimateIndexBufferSize() { std::numeric_limits<u32>::max()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); const size_t log2_byte_size = Common::Log2Ceil64(byte_size); - const size_t cap{GetMaxCurrentVertices() * 3 * byte_size}; + const size_t cap{GetMaxCurrentVertices() * 4 * byte_size}; const size_t lower_cap = std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); return std::min<size_t>( diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 44a771d65..af0a453ee 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -559,7 +559,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), GL_SEPARATE_ATTRIBS); + const GLenum buffer_mode = + num_xfb_buffers_active == 1 ? GL_INTERLEAVED_ATTRIBS : GL_SEPARATE_ATTRIBS; + glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), buffer_mode); } void GraphicsPipeline::GenerateTransformFeedbackState() { @@ -567,12 +569,14 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { // when this is required. GLint* cursor{xfb_attribs.data()}; + num_xfb_buffers_active = 0; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { const auto& layout = key.xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; } + num_xfb_buffers_active++; const auto& locations = key.xfb_state.varyings[feedback]; std::optional<u32> current_index; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 74fc9cc3d..2f70c1ae9 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -154,6 +154,7 @@ private: static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; + u32 num_xfb_buffers_active{}; std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; std::mutex built_mutex; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 22bf8cc77..89b455bff 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -263,6 +263,22 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.y_negate = key.state.y_negate != 0; return info; } + +size_t GetTotalPipelineWorkers() { + const size_t max_core_threads = + std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL; +#ifdef ANDROID + // Leave at least a few cores free in android + constexpr size_t free_cores = 3ULL; + if (max_core_threads <= free_cores) { + return 1ULL; + } + return max_core_threads - free_cores; +#else + return max_core_threads; +#endif +} + } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -294,11 +310,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, -#ifdef ANDROID - workers(1, "VkPipelineBuilder"), -#else - workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), -#endif + workers(device.HasBrokenParallelShaderCompiling() ? 1ULL : GetTotalPipelineWorkers(), + "VkPipelineBuilder"), serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverId driver_id{device.GetDriverID()}; @@ -338,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_native_ndc = device.IsExtDepthClipControlSupported(), .support_scaled_attributes = !device.MustEmulateScaledFormats(), + .support_multi_viewport = device.SupportsMultiViewport(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 66c03bf17..078777cdd 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -211,6 +211,13 @@ public: return; } PauseCounter(); + const auto driver_id = device.GetDriverID(); + if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { + pending_sync.clear(); + sync_values_stash.clear(); + return; + } sync_values_stash.clear(); sync_values_stash.emplace_back(); std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); @@ -1378,6 +1385,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku return true; } + auto driver_id = impl->device.GetDriverID(); + if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { + return true; + } + for (size_t i = 0; i < 2; i++) { is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e518756d2..6900b8ffa 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -635,6 +635,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR has_broken_cube_compatibility = true; } } + if (is_qualcomm) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) { + has_broken_parallel_compiling = true; + } + } if (extensions.sampler_filter_minmax && is_amd) { // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. if (!features.shader_float16_int8.shaderFloat16) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b213ed7dd..4f3846345 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -102,6 +102,7 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ @@ -599,6 +600,11 @@ public: return has_broken_cube_compatibility; } + /// Returns true if parallel shader compiling has issues with the current driver. + bool HasBrokenParallelShaderCompiling() const { + return has_broken_parallel_compiling; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return properties.driver.driverName; @@ -663,6 +669,10 @@ public: return supports_conditional_barriers; } + bool SupportsMultiViewport() const { + return features2.features.multiViewport; + } + [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, u32 driver_version) { if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { @@ -794,6 +804,7 @@ private: bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. bool has_broken_compute{}; ///< Compute shaders can cause crashes bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit + bool has_broken_parallel_compiling{}; ///< Has broken parallel shader compiling. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool supports_d24_depth{}; ///< Supports D24 depth buffers. |