summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp119
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h76
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp63
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp421
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp102
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h119
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h6
14 files changed, 843 insertions, 210 deletions
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index e62b36822..3d328a250 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -48,43 +48,30 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell&
}
} // Anonymous namespace
-void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
- bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
+void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features) {
const Maxwell& regs = maxwell3d.regs;
const auto topology_ = maxwell3d.draw_manager->GetDrawState().topology;
- const std::array enabled_lut{
- regs.polygon_offset_point_enable,
- regs.polygon_offset_line_enable,
- regs.polygon_offset_fill_enable,
- };
- const u32 topology_index = static_cast<u32>(topology_);
raw1 = 0;
- extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
- dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
+ extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0);
+ extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0);
+ extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0);
+ extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0);
+ extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0);
+ dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
- primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
- depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
- depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
- Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
- regs.viewport_clip_control.geometry_clip ==
- Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
- regs.viewport_clip_control.geometry_clip ==
- Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
- patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
- logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
- logic_op.Assign(PackLogicOp(regs.logic_op.op));
+ patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
topology.Assign(topology_);
msaa_mode.Assign(regs.anti_alias_samples_mode);
raw2 = 0;
- rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
+
const auto test_func =
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always_GL;
alpha_test_func.Assign(PackComparisonOp(test_func));
@@ -97,6 +84,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0);
alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0);
+ app_stage.Assign(maxwell3d.engine_state);
for (size_t i = 0; i < regs.rt.size(); ++i) {
color_formats[i] = static_cast<u8>(regs.rt[i].format);
@@ -105,7 +93,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
point_size = Common::BitCast<u32>(regs.point_size);
if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
- if (has_dynamic_vertex_input) {
+ if (features.has_dynamic_vertex_input) {
// Dirty flag will be reset by the command buffer update
static constexpr std::array LUT{
0u, // Invalid
@@ -144,12 +132,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
}
}
}
- if (maxwell3d.dirty.flags[Dirty::Blending]) {
- maxwell3d.dirty.flags[Dirty::Blending] = false;
- for (size_t index = 0; index < attachments.size(); ++index) {
- attachments[index].Refresh(regs, index);
- }
- }
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
const auto& transform = regs.viewport_transform;
@@ -157,8 +139,27 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
return static_cast<u16>(viewport.swizzle.raw);
});
}
+ dynamic_state.raw1 = 0;
+ dynamic_state.raw2 = 0;
if (!extended_dynamic_state) {
dynamic_state.Refresh(regs);
+ std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
+ return static_cast<u16>(array.stride.Value());
+ });
+ }
+ if (!extended_dynamic_state_2_extra) {
+ dynamic_state.Refresh2(regs, topology, extended_dynamic_state_2);
+ }
+ if (!extended_dynamic_state_3_blend) {
+ if (maxwell3d.dirty.flags[Dirty::Blending]) {
+ maxwell3d.dirty.flags[Dirty::Blending] = false;
+ for (size_t index = 0; index < attachments.size(); ++index) {
+ attachments[index].Refresh(regs, index);
+ }
+ }
+ }
+ if (!extended_dynamic_state_3_enables) {
+ dynamic_state.Refresh3(regs);
}
if (xfb_enabled) {
RefreshXfbState(xfb_state, regs);
@@ -175,12 +176,11 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t
mask_a.Assign(mask.A);
// TODO: C++20 Use templated lambda to deduplicate code
+ if (!regs.blend.enable[index]) {
+ return;
+ }
- if (!regs.blend_per_target_enabled) {
- if (!regs.blend.enable[index]) {
- return;
- }
- const auto& src = regs.blend;
+ const auto setup_blend = [&]<typename T>(const T& src) {
equation_rgb.Assign(PackBlendEquation(src.color_op));
equation_a.Assign(PackBlendEquation(src.alpha_op));
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
@@ -188,20 +188,13 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
enable.Assign(1);
- return;
- }
+ };
- if (!regs.blend.enable[index]) {
+ if (!regs.blend_per_target_enabled) {
+ setup_blend(regs.blend);
return;
}
- const auto& src = regs.blend_per_target[index];
- equation_rgb.Assign(PackBlendEquation(src.color_op));
- equation_a.Assign(PackBlendEquation(src.alpha_op));
- factor_source_rgb.Assign(PackBlendFactor(src.color_source));
- factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
- factor_source_a.Assign(PackBlendFactor(src.alpha_source));
- factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
- enable.Assign(1);
+ setup_blend(regs.blend_per_target[index]);
}
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
@@ -211,8 +204,6 @@ void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
packed_front_face = 1 - packed_front_face;
}
- raw1 = 0;
- raw2 = 0;
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op.fail));
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op.zfail));
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op.zpass));
@@ -236,9 +227,37 @@ void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
cull_face.Assign(PackCullFace(regs.gl_cull_face));
cull_enable.Assign(regs.gl_cull_test_enabled != 0 ? 1 : 0);
- std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
- return static_cast<u16>(array.stride.Value());
- });
+}
+
+void FixedPipelineState::DynamicState::Refresh2(const Maxwell& regs,
+ Maxwell::PrimitiveTopology topology_,
+ bool base_feautures_supported) {
+ logic_op.Assign(PackLogicOp(regs.logic_op.op));
+
+ if (base_feautures_supported) {
+ return;
+ }
+
+ const std::array enabled_lut{
+ regs.polygon_offset_point_enable,
+ regs.polygon_offset_line_enable,
+ regs.polygon_offset_fill_enable,
+ };
+ const u32 topology_index = static_cast<u32>(topology_);
+
+ rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
+ primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
+ depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
+}
+
+void FixedPipelineState::DynamicState::Refresh3(const Maxwell& regs) {
+ logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
+ depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
+ regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
+ regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
}
size_t FixedPipelineState::Hash() const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index ab79fb8f3..98ea20b42 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -17,6 +17,15 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct DynamicFeatures {
+ bool has_extended_dynamic_state;
+ bool has_extended_dynamic_state_2;
+ bool has_extended_dynamic_state_2_extra;
+ bool has_extended_dynamic_state_3_blend;
+ bool has_extended_dynamic_state_3_enables;
+ bool has_dynamic_vertex_input;
+};
+
struct FixedPipelineState {
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
@@ -133,6 +142,17 @@ struct FixedPipelineState {
struct DynamicState {
union {
u32 raw1;
+ BitField<0, 2, u32> cull_face;
+ BitField<2, 1, u32> cull_enable;
+ BitField<3, 1, u32> primitive_restart_enable;
+ BitField<4, 1, u32> depth_bias_enable;
+ BitField<5, 1, u32> rasterize_enable;
+ BitField<6, 4, u32> logic_op;
+ BitField<10, 1, u32> logic_op_enable;
+ BitField<11, 1, u32> depth_clamp_disabled;
+ };
+ union {
+ u32 raw2;
StencilFace<0> front;
StencilFace<12> back;
BitField<24, 1, u32> stencil_enable;
@@ -142,15 +162,11 @@ struct FixedPipelineState {
BitField<28, 1, u32> front_face;
BitField<29, 3, u32> depth_test_func;
};
- union {
- u32 raw2;
- BitField<0, 2, u32> cull_face;
- BitField<2, 1, u32> cull_enable;
- };
- // Vertex stride is a 12 bits value, we have 4 bits to spare per element
- std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
void Refresh(const Maxwell& regs);
+ void Refresh2(const Maxwell& regs, Maxwell::PrimitiveTopology topology,
+ bool base_feautures_supported);
+ void Refresh3(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
@@ -168,25 +184,24 @@ struct FixedPipelineState {
union {
u32 raw1;
BitField<0, 1, u32> extended_dynamic_state;
- BitField<1, 1, u32> dynamic_vertex_input;
- BitField<2, 1, u32> xfb_enabled;
- BitField<3, 1, u32> primitive_restart_enable;
- BitField<4, 1, u32> depth_bias_enable;
- BitField<5, 1, u32> depth_clamp_disabled;
- BitField<6, 1, u32> ndc_minus_one_to_one;
- BitField<7, 2, u32> polygon_mode;
- BitField<9, 5, u32> patch_control_points_minus_one;
- BitField<14, 2, u32> tessellation_primitive;
- BitField<16, 2, u32> tessellation_spacing;
- BitField<18, 1, u32> tessellation_clockwise;
- BitField<19, 1, u32> logic_op_enable;
- BitField<20, 4, u32> logic_op;
+ BitField<1, 1, u32> extended_dynamic_state_2;
+ BitField<2, 1, u32> extended_dynamic_state_2_extra;
+ BitField<3, 1, u32> extended_dynamic_state_3_blend;
+ BitField<4, 1, u32> extended_dynamic_state_3_enables;
+ BitField<5, 1, u32> dynamic_vertex_input;
+ BitField<6, 1, u32> xfb_enabled;
+ BitField<7, 1, u32> ndc_minus_one_to_one;
+ BitField<8, 2, u32> polygon_mode;
+ BitField<10, 2, u32> tessellation_primitive;
+ BitField<12, 2, u32> tessellation_spacing;
+ BitField<14, 1, u32> tessellation_clockwise;
+ BitField<15, 5, u32> patch_control_points_minus_one;
+
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;
- BitField<0, 1, u32> rasterize_enable;
BitField<1, 3, u32> alpha_test_func;
BitField<4, 1, u32> early_z;
BitField<5, 1, u32> depth_enabled;
@@ -197,25 +212,28 @@ struct FixedPipelineState {
BitField<14, 1, u32> smooth_lines;
BitField<15, 1, u32> alpha_to_coverage_enabled;
BitField<16, 1, u32> alpha_to_one_enabled;
+ BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
};
std::array<u8, Maxwell::NumRenderTargets> color_formats;
u32 alpha_test_ref;
u32 point_size;
- std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
union {
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
u64 enabled_divisors;
};
+
+ DynamicState dynamic_state;
+ std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
+ // Vertex stride is a 12 bits value, we have 4 bits to spare per element
+ std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
- DynamicState dynamic_state;
VideoCommon::TransformFeedbackState xfb_state;
- void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
- bool has_dynamic_vertex_input);
+ void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features);
size_t Hash() const noexcept;
@@ -230,13 +248,17 @@ struct FixedPipelineState {
// When transform feedback is enabled, use the whole struct
return sizeof(*this);
}
- if (dynamic_vertex_input) {
+ if (dynamic_vertex_input && extended_dynamic_state_3_blend) {
// Exclude dynamic state and attributes
+ return offsetof(FixedPipelineState, dynamic_state);
+ }
+ if (dynamic_vertex_input) {
+ // Exclude dynamic state
return offsetof(FixedPipelineState, attributes);
}
if (extended_dynamic_state) {
// Exclude dynamic state
- return offsetof(FixedPipelineState, dynamic_state);
+ return offsetof(FixedPipelineState, vertex_strides);
}
// Default
return offsetof(FixedPipelineState, xfb_state);
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 6b54d7111..487d8b416 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -56,7 +56,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
@@ -516,6 +517,7 @@ void BufferCacheRuntime::ReserveNullBuffer() {
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
+ create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
null_buffer = device.GetLogical().CreateBuffer(create_info);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 515d8d869..d11383bf1 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -201,6 +201,22 @@ struct SimpleVertexSpec {
static constexpr bool has_images = false;
};
+struct SimpleStorageSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+ static constexpr bool has_storage_buffers = true;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct SimpleImageSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = true;
+};
+
struct DefaultSpec {
static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
static constexpr bool has_storage_buffers = true;
@@ -211,7 +227,8 @@ struct DefaultSpec {
ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
const std::array<Shader::Info, NUM_STAGES>& infos) {
- return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
+ return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, SimpleStorageSpec, SimpleImageSpec,
+ DefaultSpec>(modules, infos);
}
} // Anonymous namespace
@@ -524,6 +541,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
FixedPipelineState::DynamicState dynamic{};
if (!key.state.extended_dynamic_state) {
dynamic = key.state.dynamic_state;
+ } else {
+ dynamic.raw1 = key.state.dynamic_state.raw1;
}
static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
@@ -561,7 +580,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vertex_bindings.push_back({
.binding = static_cast<u32>(index),
- .stride = dynamic.vertex_strides[index],
+ .stride = key.state.vertex_strides[index],
.inputRate = rate,
});
if (instanced) {
@@ -625,7 +644,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pNext = nullptr,
.flags = 0,
.topology = input_assembly_topology,
- .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
+ .primitiveRestartEnable = dynamic.primitive_restart_enable != 0 &&
((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
device.IsTopologyListPrimitiveRestartSupported()) ||
SupportsPrimitiveRestart(input_assembly_topology) ||
@@ -672,15 +691,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pNext = nullptr,
.flags = 0,
.depthClampEnable =
- static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+ static_cast<VkBool32>(dynamic.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
.rasterizerDiscardEnable =
- static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+ static_cast<VkBool32>(dynamic.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
.polygonMode =
MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
.cullMode = static_cast<VkCullModeFlags>(
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
- .depthBiasEnable = key.state.depth_bias_enable,
+ .depthBiasEnable = (dynamic.depth_bias_enable == 0 ? VK_TRUE : VK_FALSE),
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
@@ -782,13 +801,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .logicOpEnable = key.state.logic_op_enable != 0,
- .logicOp = static_cast<VkLogicOp>(key.state.logic_op.Value()),
+ .logicOpEnable = dynamic.logic_op_enable != 0,
+ .logicOp = static_cast<VkLogicOp>(dynamic.logic_op.Value()),
.attachmentCount = static_cast<u32>(cb_attachments.size()),
.pAttachments = cb_attachments.data(),
.blendConstants = {},
};
- static_vector<VkDynamicState, 19> dynamic_states{
+ static_vector<VkDynamicState, 28> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
@@ -811,6 +830,32 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
+ if (key.state.extended_dynamic_state_2) {
+ static constexpr std::array extended2{
+ VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
+ VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
+ VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
+ };
+ dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
+ }
+ if (key.state.extended_dynamic_state_2_extra) {
+ dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
+ }
+ if (key.state.extended_dynamic_state_3_blend) {
+ static constexpr std::array extended3{
+ VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
+ VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
+ VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
+ };
+ dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
+ }
+ if (key.state.extended_dynamic_state_3_enables) {
+ static constexpr std::array extended3{
+ VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
+ VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
+ };
+ dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
+ }
}
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index e7262420c..3046b72ab 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
-constexpr u32 CACHE_VERSION = 8;
+constexpr u32 CACHE_VERSION = 10;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -351,6 +351,15 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
LOG_WARNING(Render_Vulkan, "maxVertexInputBindings is too low: {} < {}",
device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays);
}
+
+ dynamic_features = DynamicFeatures{
+ .has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(),
+ .has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported(),
+ .has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported(),
+ .has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported(),
+ .has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported(),
+ .has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
+ };
}
PipelineCache::~PipelineCache() = default;
@@ -362,8 +371,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
- graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
- device.IsExtVertexInputDynamicStateSupported());
+ graphics_key.state.Refresh(*maxwell3d, dynamic_features);
if (current_pipeline) {
GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
@@ -439,14 +447,21 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
});
++state.total;
}};
- const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
- const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
GraphicsPipelineCacheKey key;
file.read(reinterpret_cast<char*>(&key), sizeof(key));
- if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
- (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
+ if ((key.state.extended_dynamic_state != 0) !=
+ dynamic_features.has_extended_dynamic_state ||
+ (key.state.extended_dynamic_state_2 != 0) !=
+ dynamic_features.has_extended_dynamic_state_2 ||
+ (key.state.extended_dynamic_state_2_extra != 0) !=
+ dynamic_features.has_extended_dynamic_state_2_extra ||
+ (key.state.extended_dynamic_state_3_blend != 0) !=
+ dynamic_features.has_extended_dynamic_state_3_blend ||
+ (key.state.extended_dynamic_state_3_enables != 0) !=
+ dynamic_features.has_extended_dynamic_state_3_enables ||
+ (key.state.dynamic_vertex_input != 0) != dynamic_features.has_dynamic_vertex_input) {
return;
}
workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 61f9e9366..b4f593ef5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -160,6 +160,7 @@ private:
Common::ThreadWorker workers;
Common::ThreadWorker serialization_thread;
+ DynamicFeatures dynamic_features;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index ac1eb9895..242bf9602 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -180,7 +180,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
RasterizerVulkan::~RasterizerVulkan() = default;
-void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
+template <typename Func>
+void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
@@ -201,20 +202,67 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
UpdateDynamicStates();
- const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
- const u32 num_instances{instance_count};
- const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
- scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
- if (draw_params.is_indexed) {
- cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
- draw_params.first_index, draw_params.base_vertex,
- draw_params.base_instance);
- } else {
- cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
- draw_params.base_vertex, draw_params.base_instance);
+ draw_func();
+
+ EndTransformFeedback();
+}
+
+void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
+ PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
+ const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
+ const u32 num_instances{instance_count};
+ const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
+ scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
+ if (draw_params.is_indexed) {
+ cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
+ draw_params.first_index, draw_params.base_vertex,
+ draw_params.base_instance);
+ } else {
+ cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
+ draw_params.base_vertex, draw_params.base_instance);
+ }
+ });
+ });
+}
+
+void RasterizerVulkan::DrawIndirect() {
+ const auto& params = maxwell3d->draw_manager->GetIndirectParams();
+ buffer_cache.SetDrawIndirect(&params);
+ PrepareDraw(params.is_indexed, [this, &params] {
+ const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
+ const auto& buffer = indirect_buffer.first;
+ const auto& offset = indirect_buffer.second;
+ if (params.include_count) {
+ const auto count = buffer_cache.GetDrawIndirectCount();
+ const auto& draw_buffer = count.first;
+ const auto& offset_base = count.second;
+ scheduler.Record([draw_buffer_obj = draw_buffer->Handle(),
+ buffer_obj = buffer->Handle(), offset_base, offset,
+ params](vk::CommandBuffer cmdbuf) {
+ if (params.is_indexed) {
+ cmdbuf.DrawIndexedIndirectCount(
+ buffer_obj, offset, draw_buffer_obj, offset_base,
+ static_cast<u32>(params.max_draw_counts), static_cast<u32>(params.stride));
+ } else {
+ cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base,
+ static_cast<u32>(params.max_draw_counts),
+ static_cast<u32>(params.stride));
+ }
+ });
+ return;
}
+ scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) {
+ if (params.is_indexed) {
+ cmdbuf.DrawIndexedIndirect(buffer_obj, offset,
+ static_cast<u32>(params.max_draw_counts),
+ static_cast<u32>(params.stride));
+ } else {
+ cmdbuf.DrawIndirect(buffer_obj, offset, static_cast<u32>(params.max_draw_counts),
+ static_cast<u32>(params.stride));
+ }
+ });
});
- EndTransformFeedback();
+ buffer_cache.SetDrawIndirect(nullptr);
}
void RasterizerVulkan::Clear(u32 layer_count) {
@@ -379,44 +427,58 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in
void RasterizerVulkan::FlushAll() {}
-void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
+void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) {
return;
}
- {
+ if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
- {
+ if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
- query_cache.FlushRegion(addr, size);
+ if ((True(which & VideoCommon::CacheType::QueryCache))) {
+ query_cache.FlushRegion(addr, size);
+ }
}
-bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
- std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
+bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
+ if ((True(which & VideoCommon::CacheType::BufferCache))) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ if (buffer_cache.IsRegionGpuModified(addr, size)) {
+ return true;
+ }
+ }
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.IsRegionGpuModified(addr, size);
+ return false;
+ }
+ if (True(which & VideoCommon::CacheType::TextureCache)) {
+ std::scoped_lock lock{texture_cache.mutex};
+ return texture_cache.IsRegionGpuModified(addr, size);
}
- return texture_cache.IsRegionGpuModified(addr, size) ||
- buffer_cache.IsRegionGpuModified(addr, size);
+ return false;
}
-void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
+void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) {
return;
}
- {
+ if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
- {
+ if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
- pipeline_cache.InvalidateRegion(addr, size);
- query_cache.InvalidateRegion(addr, size);
+ if ((True(which & VideoCommon::CacheType::QueryCache))) {
+ query_cache.InvalidateRegion(addr, size);
+ }
+ if ((True(which & VideoCommon::CacheType::ShaderCache))) {
+ pipeline_cache.InvalidateRegion(addr, size);
+ }
}
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
@@ -481,11 +543,12 @@ void RasterizerVulkan::ReleaseFences() {
fence_manager.WaitPendingFences();
}
-void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) {
- FlushRegion(addr, size);
+ FlushRegion(addr, size, which);
}
- InvalidateRegion(addr, size);
+ InvalidateRegion(addr, size, which);
}
void RasterizerVulkan::WaitForIdle() {
@@ -541,6 +604,21 @@ void RasterizerVulkan::TickFrame() {
}
}
+bool RasterizerVulkan::AccelerateConditionalRendering() {
+ if (Settings::IsGPULevelHigh()) {
+ // TODO(Blinkhawk): Reimplement Host conditional rendering.
+ return false;
+ }
+ // Medium / Low Hack: stub any checks on queries writen into the buffer cache.
+ const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
+ Maxwell::ReportSemaphore::Compare cmp;
+ if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
+ VideoCommon::CacheType::BufferCache)) {
+ return true;
+ }
+ return false;
+}
+
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -561,7 +639,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
}
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
- std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+ std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}
@@ -639,16 +717,35 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateLineWidth(regs);
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
- UpdateDepthBoundsTestEnable(regs);
- UpdateDepthTestEnable(regs);
- UpdateDepthWriteEnable(regs);
UpdateDepthCompareOp(regs);
UpdateFrontFace(regs);
UpdateStencilOp(regs);
- UpdateStencilTestEnable(regs);
+
if (device.IsExtVertexInputDynamicStateSupported()) {
UpdateVertexInput(regs);
}
+
+ if (state_tracker.TouchStateEnable()) {
+ UpdateDepthBoundsTestEnable(regs);
+ UpdateDepthTestEnable(regs);
+ UpdateDepthWriteEnable(regs);
+ UpdateStencilTestEnable(regs);
+ if (device.IsExtExtendedDynamicState2Supported()) {
+ UpdatePrimitiveRestartEnable(regs);
+ UpdateRasterizerDiscardEnable(regs);
+ UpdateDepthBiasEnable(regs);
+ }
+ if (device.IsExtExtendedDynamicState3EnablesSupported()) {
+ UpdateLogicOpEnable(regs);
+ UpdateDepthClampEnable(regs);
+ }
+ }
+ if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
+ UpdateLogicOp(regs);
+ }
+ if (device.IsExtExtendedDynamicState3Supported()) {
+ UpdateBlending(regs);
+ }
}
}
@@ -789,32 +886,92 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
if (!state_tracker.TouchStencilProperties()) {
return;
}
- if (regs.stencil_two_side_enable) {
- // Separate values per face
- scheduler.Record(
- [front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask,
- front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref,
- back_write_mask = regs.stencil_back_mask,
- back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
+ bool update_references = state_tracker.TouchStencilReference();
+ bool update_write_mask = state_tracker.TouchStencilWriteMask();
+ bool update_compare_masks = state_tracker.TouchStencilCompare();
+ if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
+ update_references = true;
+ update_write_mask = true;
+ update_compare_masks = true;
+ }
+ if (update_references) {
+ [&]() {
+ if (regs.stencil_two_side_enable) {
+ if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) &&
+ !state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) {
+ return;
+ }
+ } else {
+ if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
+ return;
+ }
+ }
+ scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
+ two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
+ const bool set_back = two_sided && front_ref != back_ref;
// Front face
- cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref);
- cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask);
- cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask);
-
- // Back face
- cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
- cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
- cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
+ cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
+ : VK_STENCIL_FACE_FRONT_AND_BACK,
+ front_ref);
+ if (set_back) {
+ cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
+ }
});
- } else {
- // Front face defines both faces
- scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask,
- test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) {
- cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
- cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
- cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
- });
+ }();
+ }
+ if (update_write_mask) {
+ [&]() {
+ if (regs.stencil_two_side_enable) {
+ if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) &&
+ !state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
+ return;
+ }
+ } else {
+ if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
+ return;
+ }
+ }
+ scheduler.Record([front_write_mask = regs.stencil_front_mask,
+ back_write_mask = regs.stencil_back_mask,
+ two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
+ const bool set_back = two_sided && front_write_mask != back_write_mask;
+ // Front face
+ cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
+ : VK_STENCIL_FACE_FRONT_AND_BACK,
+ front_write_mask);
+ if (set_back) {
+ cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
+ }
+ });
+ }();
+ }
+ if (update_compare_masks) {
+ [&]() {
+ if (regs.stencil_two_side_enable) {
+ if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) &&
+ !state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
+ return;
+ }
+ } else {
+ if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
+ return;
+ }
+ }
+ scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
+ back_test_mask = regs.stencil_back_func_mask,
+ two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
+ const bool set_back = two_sided && front_test_mask != back_test_mask;
+ // Front face
+ cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
+ : VK_STENCIL_FACE_FRONT_AND_BACK,
+ front_test_mask);
+ if (set_back) {
+ cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
+ }
+ });
+ }();
}
+ state_tracker.ClearStencilReset();
}
void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -868,6 +1025,82 @@ void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& r
});
}
+void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchPrimitiveRestartEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetPrimitiveRestartEnableEXT(enable);
+ });
+}
+
+void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchRasterizerDiscardEnable()) {
+ return;
+ }
+ scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthBiasEnable()) {
+ return;
+ }
+ constexpr size_t POINT = 0;
+ constexpr size_t LINE = 1;
+ constexpr size_t POLYGON = 2;
+ static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
+ POINT, // Points
+ LINE, // Lines
+ LINE, // LineLoop
+ LINE, // LineStrip
+ POLYGON, // Triangles
+ POLYGON, // TriangleStrip
+ POLYGON, // TriangleFan
+ POLYGON, // Quads
+ POLYGON, // QuadStrip
+ POLYGON, // Polygon
+ LINE, // LinesAdjacency
+ LINE, // LineStripAdjacency
+ POLYGON, // TrianglesAdjacency
+ POLYGON, // TriangleStripAdjacency
+ POLYGON, // Patches
+ };
+ const std::array enabled_lut{
+ regs.polygon_offset_point_enable,
+ regs.polygon_offset_line_enable,
+ regs.polygon_offset_fill_enable,
+ };
+ const u32 topology_index = static_cast<u32>(maxwell3d->draw_manager->GetDrawState().topology);
+ const u32 enable = enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]];
+ scheduler.Record(
+ [enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBiasEnableEXT(enable != 0); });
+}
+
+void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchLogicOpEnable()) {
+ return;
+ }
+ scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetLogicOpEnableEXT(enable != 0);
+ });
+}
+
+void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthClampEnable()) {
+ return;
+ }
+ bool is_enabled = !(regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
+ regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
+ regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
+ scheduler.Record(
+ [is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); });
+}
+
void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthCompareOp()) {
return;
@@ -925,6 +1158,78 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
}
}
+void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchLogicOp()) {
+ return;
+ }
+ const auto op_value = static_cast<u32>(regs.logic_op.op);
+ auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast<VkLogicOp>(op_value - 0x1500)
+ : VK_LOGIC_OP_NO_OP;
+ scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); });
+}
+
+void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchBlending()) {
+ return;
+ }
+
+ if (state_tracker.TouchColorMask()) {
+ std::array<VkColorComponentFlags, Maxwell::NumRenderTargets> setup_masks{};
+ for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) {
+ const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
+ auto& current = setup_masks[index];
+ if (mask.R) {
+ current |= VK_COLOR_COMPONENT_R_BIT;
+ }
+ if (mask.G) {
+ current |= VK_COLOR_COMPONENT_G_BIT;
+ }
+ if (mask.B) {
+ current |= VK_COLOR_COMPONENT_B_BIT;
+ }
+ if (mask.A) {
+ current |= VK_COLOR_COMPONENT_A_BIT;
+ }
+ }
+ scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetColorWriteMaskEXT(0, setup_masks);
+ });
+ }
+
+ if (state_tracker.TouchBlendEnable()) {
+ std::array<VkBool32, Maxwell::NumRenderTargets> setup_enables{};
+ std::ranges::transform(
+ regs.blend.enable, setup_enables.begin(),
+ [&](const auto& is_enabled) { return is_enabled != 0 ? VK_TRUE : VK_FALSE; });
+ scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetColorBlendEnableEXT(0, setup_enables);
+ });
+ }
+
+ if (state_tracker.TouchBlendEquations()) {
+ std::array<VkColorBlendEquationEXT, Maxwell::NumRenderTargets> setup_blends{};
+ for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) {
+ const auto blend_setup = [&]<typename T>(const T& guest_blend) {
+ auto& host_blend = setup_blends[index];
+ host_blend.srcColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_source);
+ host_blend.dstColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_dest);
+ host_blend.colorBlendOp = MaxwellToVK::BlendEquation(guest_blend.color_op);
+ host_blend.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_source);
+ host_blend.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_dest);
+ host_blend.alphaBlendOp = MaxwellToVK::BlendEquation(guest_blend.alpha_op);
+ };
+ if (!regs.blend_per_target_enabled) {
+ blend_setup(regs.blend);
+ continue;
+ }
+ blend_setup(regs.blend_per_target[index]);
+ }
+ scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetColorBlendEquationEXT(0, setup_blends);
+ });
+ }
+}
+
void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilTestEnable()) {
return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ee483cfd9..c661e5b19 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -65,6 +65,7 @@ public:
~RasterizerVulkan() override;
void Draw(bool is_indexed, u32 instance_count) override;
+ void DrawIndirect() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -72,9 +73,12 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
- void FlushRegion(VAddr addr, u64 size) override;
- bool MustFlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
+ void FlushRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
+ bool MustFlushRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
+ void InvalidateRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
@@ -84,12 +88,14 @@ public:
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(
+ VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
+ bool AccelerateConditionalRendering() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -114,6 +120,9 @@ private:
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
+ template <typename Func>
+ void PrepareDraw(bool is_indexed, Func&&);
+
void FlushWork();
void UpdateDynamicStates();
@@ -135,9 +144,16 @@ private:
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 06f68d09a..74ca77216 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <utility>
@@ -94,7 +94,8 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -142,11 +143,23 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
StagingBufferPool::~StagingBufferPool() = default;
-StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
- if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
+StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
+ if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
return GetStreamBuffer(size);
}
- return GetStagingBuffer(size, usage);
+ return GetStagingBuffer(size, usage, deferred);
+}
+
+void StagingBufferPool::FreeDeferred(StagingBufferRef& ref) {
+ auto& entries = GetCache(ref.usage)[ref.log2_level].entries;
+ const auto is_this_one = [&ref](const StagingBuffer& entry) {
+ return entry.index == ref.index;
+ };
+ auto it = std::find_if(entries.begin(), entries.end(), is_this_one);
+ ASSERT(it != entries.end());
+ ASSERT(it->deferred);
+ it->tick = scheduler.CurrentTick();
+ it->deferred = false;
}
void StagingBufferPool::TickFrame() {
@@ -187,6 +200,9 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
.mapped_span = std::span<u8>(stream_pointer + offset, size),
+ .usage{},
+ .log2_level{},
+ .index{},
};
}
@@ -196,19 +212,21 @@ bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end)
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
};
-StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
- if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
+StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage,
+ bool deferred) {
+ if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage, deferred)) {
return *ref;
}
- return CreateStagingBuffer(size, usage);
+ return CreateStagingBuffer(size, usage, deferred);
}
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
- MemoryUsage usage) {
+ MemoryUsage usage,
+ bool deferred) {
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
const auto is_free = [this](const StagingBuffer& entry) {
- return scheduler.IsFree(entry.tick);
+ return !entry.deferred && scheduler.IsFree(entry.tick);
};
auto& entries = cache_level.entries;
const auto hint_it = entries.begin() + cache_level.iterate_index;
@@ -220,11 +238,14 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
}
}
cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
- it->tick = scheduler.CurrentTick();
+ it->tick = deferred ? std::numeric_limits<u64>::max() : scheduler.CurrentTick();
+ ASSERT(!it->deferred);
+ it->deferred = deferred;
return it->Ref();
}
-StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
+StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
+ bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@@ -233,7 +254,8 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
.size = 1ULL << log2,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -249,7 +271,11 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
.buffer = std::move(buffer),
.commit = std::move(commit),
.mapped_span = mapped_span,
- .tick = scheduler.CurrentTick(),
+ .usage = usage,
+ .log2_level = log2,
+ .index = unique_ids++,
+ .tick = deferred ? std::numeric_limits<u64>::max() : scheduler.CurrentTick(),
+ .deferred = deferred,
});
return entry.Ref();
}
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 91dc84da8..4fd15f11a 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@@ -20,6 +20,9 @@ struct StagingBufferRef {
VkBuffer buffer;
VkDeviceSize offset;
std::span<u8> mapped_span;
+ MemoryUsage usage;
+ u32 log2_level;
+ u64 index;
};
class StagingBufferPool {
@@ -30,7 +33,8 @@ public:
Scheduler& scheduler);
~StagingBufferPool();
- StagingBufferRef Request(size_t size, MemoryUsage usage);
+ StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
+ void FreeDeferred(StagingBufferRef& ref);
void TickFrame();
@@ -44,13 +48,20 @@ private:
vk::Buffer buffer;
MemoryCommit commit;
std::span<u8> mapped_span;
+ MemoryUsage usage;
+ u32 log2_level;
+ u64 index;
u64 tick = 0;
+ bool deferred{};
StagingBufferRef Ref() const noexcept {
return {
.buffer = *buffer,
.offset = 0,
.mapped_span = mapped_span,
+ .usage = usage,
+ .log2_level = log2_level,
+ .index = index,
};
}
};
@@ -68,11 +79,12 @@ private:
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
- StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
+ StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false);
- std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
+ std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage,
+ bool deferred);
- StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
+ StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred);
StagingBuffersCache& GetCache(MemoryUsage usage);
@@ -99,6 +111,7 @@ private:
size_t current_delete_level = 0;
u64 buffer_index = 0;
+ u64 unique_ids{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index edb41b171..d56558a83 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -27,10 +27,37 @@ using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{
- Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
- StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
- DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
- VertexBuffers, VertexInput,
+ Viewports,
+ Scissors,
+ DepthBias,
+ BlendConstants,
+ DepthBounds,
+ StencilProperties,
+ StencilReference,
+ StencilWriteMask,
+ StencilCompare,
+ LineWidth,
+ CullMode,
+ DepthBoundsEnable,
+ DepthTestEnable,
+ DepthWriteEnable,
+ DepthCompareOp,
+ FrontFace,
+ StencilOp,
+ StencilTestEnable,
+ VertexBuffers,
+ VertexInput,
+ StateEnable,
+ PrimitiveRestartEnable,
+ RasterizerDiscardEnable,
+ DepthBiasEnable,
+ LogicOpEnable,
+ DepthClampEnable,
+ LogicOp,
+ Blending,
+ ColorMask,
+ BlendEquations,
+ BlendEnable,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
@@ -75,14 +102,17 @@ void SetupDirtyDepthBounds(Tables& tables) {
}
void SetupDirtyStencilProperties(Tables& tables) {
- auto& table = tables[0];
- table[OFF(stencil_two_side_enable)] = StencilProperties;
- table[OFF(stencil_front_ref)] = StencilProperties;
- table[OFF(stencil_front_mask)] = StencilProperties;
- table[OFF(stencil_front_func_mask)] = StencilProperties;
- table[OFF(stencil_back_ref)] = StencilProperties;
- table[OFF(stencil_back_mask)] = StencilProperties;
- table[OFF(stencil_back_func_mask)] = StencilProperties;
+ const auto setup = [&](size_t position, u8 flag) {
+ tables[0][position] = flag;
+ tables[1][position] = StencilProperties;
+ };
+ tables[0][OFF(stencil_two_side_enable)] = StencilProperties;
+ setup(OFF(stencil_front_ref), StencilReference);
+ setup(OFF(stencil_front_mask), StencilWriteMask);
+ setup(OFF(stencil_front_func_mask), StencilCompare);
+ setup(OFF(stencil_back_ref), StencilReference);
+ setup(OFF(stencil_back_mask), StencilWriteMask);
+ setup(OFF(stencil_back_func_mask), StencilCompare);
}
void SetupDirtyLineWidth(Tables& tables) {
@@ -96,16 +126,22 @@ void SetupDirtyCullMode(Tables& tables) {
table[OFF(gl_cull_test_enabled)] = CullMode;
}
-void SetupDirtyDepthBoundsEnable(Tables& tables) {
- tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
-}
-
-void SetupDirtyDepthTestEnable(Tables& tables) {
- tables[0][OFF(depth_test_enable)] = DepthTestEnable;
-}
-
-void SetupDirtyDepthWriteEnable(Tables& tables) {
- tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
+void SetupDirtyStateEnable(Tables& tables) {
+ const auto setup = [&](size_t position, u8 flag) {
+ tables[0][position] = flag;
+ tables[1][position] = StateEnable;
+ };
+ setup(OFF(depth_bounds_enable), DepthBoundsEnable);
+ setup(OFF(depth_test_enable), DepthTestEnable);
+ setup(OFF(depth_write_enabled), DepthWriteEnable);
+ setup(OFF(stencil_enable), StencilTestEnable);
+ setup(OFF(primitive_restart.enabled), PrimitiveRestartEnable);
+ setup(OFF(rasterize_enable), RasterizerDiscardEnable);
+ setup(OFF(polygon_offset_point_enable), DepthBiasEnable);
+ setup(OFF(polygon_offset_line_enable), DepthBiasEnable);
+ setup(OFF(polygon_offset_fill_enable), DepthBiasEnable);
+ setup(OFF(logic_op.enable), LogicOpEnable);
+ setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable);
}
void SetupDirtyDepthCompareOp(Tables& tables) {
@@ -133,16 +169,22 @@ void SetupDirtyStencilOp(Tables& tables) {
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
}
-void SetupDirtyStencilTestEnable(Tables& tables) {
- tables[0][OFF(stencil_enable)] = StencilTestEnable;
-}
-
void SetupDirtyBlending(Tables& tables) {
tables[0][OFF(color_mask_common)] = Blending;
+ tables[1][OFF(color_mask_common)] = ColorMask;
tables[0][OFF(blend_per_target_enabled)] = Blending;
+ tables[1][OFF(blend_per_target_enabled)] = BlendEquations;
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
+ FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMask);
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
+ FillBlock(tables[1], OFF(blend), NUM(blend), BlendEquations);
+ FillBlock(tables[1], OFF(blend.enable), NUM(blend.enable), BlendEnable);
FillBlock(tables[0], OFF(blend_per_target), NUM(blend_per_target), Blending);
+ FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendEquations);
+}
+
+void SetupDirtySpecialOps(Tables& tables) {
+ tables[0][OFF(logic_op.op)] = LogicOp;
}
void SetupDirtyViewportSwizzles(Tables& tables) {
@@ -185,17 +227,15 @@ void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
SetupDirtyStencilProperties(tables);
SetupDirtyLineWidth(tables);
SetupDirtyCullMode(tables);
- SetupDirtyDepthBoundsEnable(tables);
- SetupDirtyDepthTestEnable(tables);
- SetupDirtyDepthWriteEnable(tables);
+ SetupDirtyStateEnable(tables);
SetupDirtyDepthCompareOp(tables);
SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables);
- SetupDirtyStencilTestEnable(tables);
SetupDirtyBlending(tables);
SetupDirtyViewportSwizzles(tables);
SetupDirtyVertexAttributes(tables);
SetupDirtyVertexBindings(tables);
+ SetupDirtySpecialOps(tables);
}
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
@@ -204,6 +244,8 @@ void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
void StateTracker::InvalidateState() {
flags->set();
+ current_topology = INVALID_TOPOLOGY;
+ stencil_reset = true;
}
StateTracker::StateTracker()
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 2296dea60..8010ad26c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -35,6 +35,9 @@ enum : u8 {
BlendConstants,
DepthBounds,
StencilProperties,
+ StencilReference,
+ StencilWriteMask,
+ StencilCompare,
LineWidth,
CullMode,
@@ -45,8 +48,18 @@ enum : u8 {
FrontFace,
StencilOp,
StencilTestEnable,
+ PrimitiveRestartEnable,
+ RasterizerDiscardEnable,
+ DepthBiasEnable,
+ StateEnable,
+ LogicOp,
+ LogicOpEnable,
+ DepthClampEnable,
Blending,
+ BlendEnable,
+ BlendEquations,
+ ColorMask,
ViewportSwizzles,
Last,
@@ -64,6 +77,7 @@ public:
void InvalidateCommandBufferState() {
(*flags) |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
+ stencil_reset = true;
}
void InvalidateViewports() {
@@ -103,6 +117,57 @@ public:
return Exchange(Dirty::StencilProperties, false);
}
+ bool TouchStencilReference() {
+ return Exchange(Dirty::StencilReference, false);
+ }
+
+ bool TouchStencilWriteMask() {
+ return Exchange(Dirty::StencilWriteMask, false);
+ }
+
+ bool TouchStencilCompare() {
+ return Exchange(Dirty::StencilCompare, false);
+ }
+
+ template <typename T>
+ bool ExchangeCheck(T& old_value, T new_value) {
+ bool result = old_value != new_value;
+ old_value = new_value;
+ return result;
+ }
+
+ bool TouchStencilSide(bool two_sided_stencil_new) {
+ return ExchangeCheck(two_sided_stencil, two_sided_stencil_new) || stencil_reset;
+ }
+
+ bool CheckStencilReferenceFront(u32 new_value) {
+ return ExchangeCheck(front.ref, new_value) || stencil_reset;
+ }
+
+ bool CheckStencilReferenceBack(u32 new_value) {
+ return ExchangeCheck(back.ref, new_value) || stencil_reset;
+ }
+
+ bool CheckStencilWriteMaskFront(u32 new_value) {
+ return ExchangeCheck(front.write_mask, new_value) || stencil_reset;
+ }
+
+ bool CheckStencilWriteMaskBack(u32 new_value) {
+ return ExchangeCheck(back.write_mask, new_value) || stencil_reset;
+ }
+
+ bool CheckStencilCompareMaskFront(u32 new_value) {
+ return ExchangeCheck(front.compare_mask, new_value) || stencil_reset;
+ }
+
+ bool CheckStencilCompareMaskBack(u32 new_value) {
+ return ExchangeCheck(back.compare_mask, new_value) || stencil_reset;
+ }
+
+ void ClearStencilReset() {
+ stencil_reset = false;
+ }
+
bool TouchLineWidth() const {
return Exchange(Dirty::LineWidth, false);
}
@@ -111,6 +176,10 @@ public:
return Exchange(Dirty::CullMode, false);
}
+ bool TouchStateEnable() {
+ return Exchange(Dirty::StateEnable, false);
+ }
+
bool TouchDepthBoundsTestEnable() {
return Exchange(Dirty::DepthBoundsEnable, false);
}
@@ -123,6 +192,26 @@ public:
return Exchange(Dirty::DepthWriteEnable, false);
}
+ bool TouchPrimitiveRestartEnable() {
+ return Exchange(Dirty::PrimitiveRestartEnable, false);
+ }
+
+ bool TouchRasterizerDiscardEnable() {
+ return Exchange(Dirty::RasterizerDiscardEnable, false);
+ }
+
+ bool TouchDepthBiasEnable() {
+ return Exchange(Dirty::DepthBiasEnable, false);
+ }
+
+ bool TouchLogicOpEnable() {
+ return Exchange(Dirty::LogicOpEnable, false);
+ }
+
+ bool TouchDepthClampEnable() {
+ return Exchange(Dirty::DepthClampEnable, false);
+ }
+
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
@@ -135,10 +224,30 @@ public:
return Exchange(Dirty::StencilOp, false);
}
+ bool TouchBlending() {
+ return Exchange(Dirty::Blending, false);
+ }
+
+ bool TouchBlendEnable() {
+ return Exchange(Dirty::BlendEnable, false);
+ }
+
+ bool TouchBlendEquations() {
+ return Exchange(Dirty::BlendEquations, false);
+ }
+
+ bool TouchColorMask() {
+ return Exchange(Dirty::ColorMask, false);
+ }
+
bool TouchStencilTestEnable() {
return Exchange(Dirty::StencilTestEnable, false);
}
+ bool TouchLogicOp() {
+ return Exchange(Dirty::LogicOp, false);
+ }
+
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;
@@ -160,10 +269,20 @@ private:
return is_dirty;
}
+ struct StencilProperties {
+ u32 ref = 0;
+ u32 write_mask = 0;
+ u32 compare_mask = 0;
+ };
+
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
+ bool two_sided_stencil = false;
+ StencilProperties front{};
+ StencilProperties back{};
+ bool stencil_reset = false;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index a65bbeb1c..d39372ec4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
}
-StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
- return staging_buffer_pool.Request(size, MemoryUsage::Download);
+StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
+ return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred);
+}
+
+void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
+ staging_buffer_pool.FreeDeferred(ref);
}
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 7ec0df134..1f27a3589 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -51,7 +51,9 @@ public:
StagingBufferRef UploadStagingBuffer(size_t size);
- StagingBufferRef DownloadStagingBuffer(size_t size);
+ StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
+
+ void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void TickFrame();
@@ -347,6 +349,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
+ static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
@@ -354,6 +357,7 @@ struct TextureCacheParams {
using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
+ using AsyncBuffer = Vulkan::StagingBufferRef;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;