summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt15
-rw-r--r--src/video_core/engines/maxwell_3d.cpp261
-rw-r--r--src/video_core/engines/maxwell_3d.h52
-rw-r--r--src/video_core/macro/macro.cpp3
-rw-r--r--src/video_core/macro/macro_hle.cpp47
-rw-r--r--src/video_core/macro/macro_interpreter.cpp2
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp2
-rw-r--r--src/video_core/memory_manager.cpp2
-rw-r--r--src/video_core/rasterizer_interface.h2
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp4
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp69
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp27
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h11
-rw-r--r--src/video_core/shader_environment.cpp104
-rw-r--r--src/video_core/shader_environment.h21
-rw-r--r--src/video_core/texture_cache/util.cpp1
30 files changed, 458 insertions, 290 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 106991969..d7f7d336c 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -73,8 +73,6 @@ add_library(video_core STATIC
macro/macro_hle.h
macro/macro_interpreter.cpp
macro/macro_interpreter.h
- macro/macro_jit_x64.cpp
- macro/macro_jit_x64.h
fence_manager.h
gpu.cpp
gpu.h
@@ -245,7 +243,7 @@ add_library(video_core STATIC
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
+target_link_libraries(video_core PUBLIC glad shader_recompiler)
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
add_dependencies(video_core ffmpeg-build)
@@ -282,8 +280,19 @@ else()
-Wno-sign-conversion
)
+
+ # xbyak
+ set_source_files_properties(macro/macro_jit_x64.cpp PROPERTIES COMPILE_OPTIONS "-Wno-conversion;-Wno-shadow")
endif()
if (ARCHITECTURE_x86_64)
+ target_sources(video_core PRIVATE
+ macro/macro_jit_x64.cpp
+ macro/macro_jit_x64.h
+ )
+ target_link_libraries(video_core PUBLIC xbyak)
+endif()
+
+if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
target_link_libraries(video_core PRIVATE dynarmic)
endif()
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b1a22b76c..4a2f2c1fd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -117,10 +117,15 @@ void Maxwell3D::InitializeRegisterDefaults() {
shadow_state = regs;
- mme_inline[MAXWELL3D_REG_INDEX(draw.end)] = true;
- mme_inline[MAXWELL3D_REG_INDEX(draw.begin)] = true;
- mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
- mme_inline[MAXWELL3D_REG_INDEX(index_buffer.count)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true;
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
@@ -208,25 +213,21 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(3);
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
return ProcessCBBind(4);
- case MAXWELL3D_REG_INDEX(draw.end):
- return DrawArrays();
case MAXWELL3D_REG_INDEX(index_buffer32_first):
regs.index_buffer.count = regs.index_buffer32_first.count;
regs.index_buffer.first = regs.index_buffer32_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- return DrawArrays();
+ return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer16_first):
regs.index_buffer.count = regs.index_buffer16_first.count;
regs.index_buffer.first = regs.index_buffer16_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- return DrawArrays();
+ return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer8_first):
regs.index_buffer.count = regs.index_buffer8_first.count;
regs.index_buffer.first = regs.index_buffer8_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- // a macro calls this one over and over, should it increase instancing?
- // Used by Hades and likely other Vulkan games.
- return DrawArrays();
+ return ProcessDraw();
case MAXWELL3D_REG_INDEX(topology_override):
use_topology_override = true;
return;
@@ -261,14 +262,13 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
// Execute the current macro.
macro_engine->Execute(macro_positions[entry], parameters);
- if (mme_draw.current_mode != MMEDrawMode::Undefined) {
- FlushMMEInlineDraw();
- }
+
+ ProcessDeferredDraw();
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- // It is an error to write to a register other than the current macro's ARG register before it
- // has finished execution.
+ // It is an error to write to a register other than the current macro's ARG register before
+ // it has finished execution.
if (executing_macro != 0) {
ASSERT(method == executing_macro + 1);
}
@@ -283,9 +283,33 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
- const u32 argument = ProcessShadowRam(method, method_argument);
- ProcessDirtyRegisters(method, argument);
- ProcessMethodCall(method, argument, method_argument, is_last_call);
+ if (draw_command[method]) {
+ regs.reg_array[method] = method_argument;
+ deferred_draw_method.push_back(method);
+ auto u32_to_u8 = [&](const u32 argument) {
+ inline_index_draw_indexes.push_back(static_cast<u8>(argument & 0x000000ff));
+ inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x0000ff00) >> 8));
+ inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x00ff0000) >> 16));
+ inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0xff000000) >> 24));
+ };
+ if (MAXWELL3D_REG_INDEX(draw_inline_index) == method) {
+ u32_to_u8(method_argument);
+ } else if (MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method) {
+ u32_to_u8(regs.inline_index_2x16.even);
+ u32_to_u8(regs.inline_index_2x16.odd);
+ } else if (MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
+ u32_to_u8(regs.inline_index_4x8.index0);
+ u32_to_u8(regs.inline_index_4x8.index1);
+ u32_to_u8(regs.inline_index_4x8.index2);
+ u32_to_u8(regs.inline_index_4x8.index3);
+ }
+ } else {
+ ProcessDeferredDraw();
+
+ const u32 argument = ProcessShadowRam(method, method_argument);
+ ProcessDirtyRegisters(method, argument);
+ ProcessMethodCall(method, argument, method_argument, is_last_call);
+ }
}
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
@@ -326,55 +350,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
}
-void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
- if (mme_draw.current_mode == MMEDrawMode::Undefined) {
- if (mme_draw.gl_begin_consume) {
- mme_draw.current_mode = expected_mode;
- mme_draw.current_count = count;
- mme_draw.instance_count = 1;
- mme_draw.gl_begin_consume = false;
- mme_draw.gl_end_count = 0;
- }
- return;
- } else {
- if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count &&
- mme_draw.instance_mode && mme_draw.gl_begin_consume) {
- mme_draw.instance_count++;
- mme_draw.gl_begin_consume = false;
- return;
- } else {
- FlushMMEInlineDraw();
- }
- }
- // Tail call in case it needs to retry.
- StepInstance(expected_mode, count);
-}
-
-void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
- if (mme_inline[method]) {
- regs.reg_array[method] = method_argument;
- if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
- method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
- const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
- ? MMEDrawMode::Array
- : MMEDrawMode::Indexed;
- StepInstance(expected_mode, method_argument);
- } else if (method == MAXWELL3D_REG_INDEX(draw.begin)) {
- mme_draw.instance_mode =
- (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
- (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged);
- mme_draw.gl_begin_consume = true;
- } else {
- mme_draw.gl_end_count++;
- }
- } else {
- if (mme_draw.current_mode != MMEDrawMode::Undefined) {
- FlushMMEInlineDraw();
- }
- CallMethod(method, method_argument, true);
- }
-}
-
void Maxwell3D::ProcessTopologyOverride() {
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
@@ -404,41 +379,6 @@ void Maxwell3D::ProcessTopologyOverride() {
}
}
-void Maxwell3D::FlushMMEInlineDraw() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
- regs.vertex_buffer.count);
- ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?");
- ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
-
- // Both instance configuration registers can not be set at the same time.
- ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First ||
- regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged,
- "Illegal combination of instancing parameters");
-
- ProcessTopologyOverride();
-
- const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
- if (ShouldExecute()) {
- rasterizer->Draw(is_indexed, true);
- }
-
- // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
- // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
- // it's possible that it is incorrect and that there is some other register used to specify the
- // drawing mode.
- if (is_indexed) {
- regs.index_buffer.count = 0;
- } else {
- regs.vertex_buffer.count = 0;
- }
- mme_draw.current_mode = MMEDrawMode::Undefined;
- mme_draw.current_count = 0;
- mme_draw.instance_count = 0;
- mme_draw.instance_mode = false;
- mme_draw.gl_begin_consume = false;
- mme_draw.gl_end_count = 0;
-}
-
void Maxwell3D::ProcessMacroUpload(u32 data) {
macro_engine->AddCode(regs.load_mme.instruction_ptr++, data);
}
@@ -573,42 +513,6 @@ void Maxwell3D::ProcessSyncPoint() {
rasterizer->SignalSyncPoint(sync_point);
}
-void Maxwell3D::DrawArrays() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
- regs.vertex_buffer.count);
- ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?");
-
- // Both instance configuration registers can not be set at the same time.
- ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First ||
- regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged,
- "Illegal combination of instancing parameters");
-
- ProcessTopologyOverride();
-
- if (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) {
- // Increment the current instance *before* drawing.
- state.current_instance++;
- } else if (regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged) {
- // Reset the current instance to 0.
- state.current_instance = 0;
- }
-
- const bool is_indexed{regs.index_buffer.count && !regs.vertex_buffer.count};
- if (ShouldExecute()) {
- rasterizer->Draw(is_indexed, false);
- }
-
- // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
- // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
- // it's possible that it is incorrect and that there is some other register used to specify the
- // drawing mode.
- if (is_indexed) {
- regs.index_buffer.count = 0;
- } else {
- regs.vertex_buffer.count = 0;
- }
-}
-
std::optional<u64> Maxwell3D::GetQueryResult() {
switch (regs.report_semaphore.query.report) {
case Regs::ReportSemaphore::Report::Payload:
@@ -691,4 +595,83 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer->Clear();
}
+void Maxwell3D::ProcessDraw(u32 instance_count) {
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
+ regs.vertex_buffer.count);
+
+ ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?");
+
+ // Both instance configuration registers can not be set at the same time.
+ ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First ||
+ regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged,
+ "Illegal combination of instancing parameters");
+
+ ProcessTopologyOverride();
+
+ const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count;
+ if (ShouldExecute()) {
+ rasterizer->Draw(is_indexed, instance_count);
+ }
+
+ if (is_indexed) {
+ regs.index_buffer.count = 0;
+ } else {
+ regs.vertex_buffer.count = 0;
+ }
+}
+
+void Maxwell3D::ProcessDeferredDraw() {
+ if (deferred_draw_method.empty()) {
+ return;
+ }
+
+ enum class DrawMode {
+ Undefined,
+ General,
+ Instance,
+ };
+ DrawMode draw_mode{DrawMode::Undefined};
+ u32 method_count = static_cast<u32>(deferred_draw_method.size());
+ u32 method = deferred_draw_method[method_count - 1];
+ if (MAXWELL3D_REG_INDEX(draw.end) != method) {
+ return;
+ }
+ draw_mode = (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
+ (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged)
+ ? DrawMode::Instance
+ : DrawMode::General;
+ u32 instance_count = 0;
+ if (draw_mode == DrawMode::Instance) {
+ u32 vertex_buffer_count = 0;
+ u32 index_buffer_count = 0;
+ for (u32 index = 0; index < method_count; ++index) {
+ method = deferred_draw_method[index];
+ if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) {
+ instance_count = ++vertex_buffer_count;
+ } else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
+ instance_count = ++index_buffer_count;
+ }
+ }
+ ASSERT_MSG(!(vertex_buffer_count && index_buffer_count),
+ "Instance both indexed and direct?");
+ } else {
+ instance_count = 1;
+ for (u32 index = 0; index < method_count; ++index) {
+ method = deferred_draw_method[index];
+ if (MAXWELL3D_REG_INDEX(draw_inline_index) == method ||
+ MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method ||
+ MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
+ regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4);
+ regs.index_buffer.format = Regs::IndexFormat::UnsignedInt;
+ break;
+ }
+ }
+ }
+
+ ProcessDraw(instance_count);
+
+ deferred_draw_method.clear();
+ inline_index_draw_indexes.clear();
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 75e3b868d..910ab213a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1739,14 +1739,11 @@ public:
Footprint_1x1_Virtual = 2,
};
- struct InlineIndex4x8Align {
+ struct InlineIndex4x8 {
union {
BitField<0, 30, u32> count;
BitField<30, 2, u32> start;
};
- };
-
- struct InlineIndex4x8Index {
union {
BitField<0, 8, u32> index0;
BitField<8, 8, u32> index1;
@@ -2836,8 +2833,7 @@ public:
u32 depth_write_enabled; ///< 0x12E8
u32 alpha_test_enabled; ///< 0x12EC
INSERT_PADDING_BYTES_NOINIT(0x10);
- InlineIndex4x8Align inline_index_4x8_align; ///< 0x1300
- InlineIndex4x8Index inline_index_4x8_index; ///< 0x1304
+ InlineIndex4x8 inline_index_4x8; ///< 0x1300
D3DCullMode d3d_cull_mode; ///< 0x1308
ComparisonOp depth_test_func; ///< 0x130C
f32 alpha_test_ref; ///< 0x1310
@@ -2974,7 +2970,7 @@ public:
CullFace gl_cull_face; ///< 0x1920
Viewport::PixelCenter viewport_pixel_center; ///< 0x1924
INSERT_PADDING_BYTES_NOINIT(0x4);
- u32 viewport_scale_offset_enbled; ///< 0x192C
+ u32 viewport_scale_offset_enabled; ///< 0x192C
INSERT_PADDING_BYTES_NOINIT(0xC);
ViewportClipControl viewport_clip_control; ///< 0x193C
UserClip::Op user_clip_op; ///< 0x1940
@@ -3048,8 +3044,6 @@ public:
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
-
- u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
};
State state{};
@@ -3064,11 +3058,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
- /// Write the value to the register identified by method.
- void CallMethodFromMME(u32 method, u32 method_argument);
-
- void FlushMMEInlineDraw();
-
bool ShouldExecute() const {
return execute_on;
}
@@ -3081,21 +3070,6 @@ public:
return *rasterizer;
}
- enum class MMEDrawMode : u32 {
- Undefined,
- Array,
- Indexed,
- };
-
- struct MMEDrawState {
- MMEDrawMode current_mode{MMEDrawMode::Undefined};
- u32 current_count{};
- u32 instance_count{};
- bool instance_mode{};
- bool gl_begin_consume{};
- u32 gl_end_count{};
- } mme_draw;
-
struct DirtyState {
using Flags = std::bitset<std::numeric_limits<u8>::max()>;
using Table = std::array<u8, Regs::NUM_REGS>;
@@ -3105,6 +3079,8 @@ public:
Tables tables{};
} dirty;
+ std::vector<u8> inline_index_draw_indexes;
+
private:
void InitializeRegisterDefaults();
@@ -3164,14 +3140,12 @@ private:
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
- /// Handles a write to the VERTEX_END_GL register, triggering a draw.
- void DrawArrays();
-
/// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
void ProcessTopologyOverride();
- // Handles a instance drawcall from MME
- void StepInstance(MMEDrawMode expected_mode, u32 count);
+ void ProcessDraw(u32 instance_count = 1);
+
+ void ProcessDeferredDraw();
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
@@ -3184,8 +3158,6 @@ private:
/// Start offsets of each macro in macro_memory
std::array<u32, 0x80> macro_positions{};
- std::array<bool, Regs::NUM_REGS> mme_inline{};
-
/// Macro method that is currently being executed / being fed parameters.
u32 executing_macro = 0;
/// Parameters that have been submitted to the macro call so far.
@@ -3198,6 +3170,9 @@ private:
bool execute_on{true};
bool use_topology_override{false};
+
+ std::array<bool, Regs::NUM_REGS> draw_command{};
+ std::vector<u32> deferred_draw_method;
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -3402,8 +3377,7 @@ ASSERT_REG_POSITION(alpha_to_coverage_dither, 0x12E0);
ASSERT_REG_POSITION(blend_per_target_enabled, 0x12E4);
ASSERT_REG_POSITION(depth_write_enabled, 0x12E8);
ASSERT_REG_POSITION(alpha_test_enabled, 0x12EC);
-ASSERT_REG_POSITION(inline_index_4x8_align, 0x1300);
-ASSERT_REG_POSITION(inline_index_4x8_index, 0x1304);
+ASSERT_REG_POSITION(inline_index_4x8, 0x1300);
ASSERT_REG_POSITION(d3d_cull_mode, 0x1308);
ASSERT_REG_POSITION(depth_test_func, 0x130C);
ASSERT_REG_POSITION(alpha_test_ref, 0x1310);
@@ -3508,7 +3482,7 @@ ASSERT_REG_POSITION(gl_cull_test_enabled, 0x1918);
ASSERT_REG_POSITION(gl_front_face, 0x191C);
ASSERT_REG_POSITION(gl_cull_face, 0x1920);
ASSERT_REG_POSITION(viewport_pixel_center, 0x1924);
-ASSERT_REG_POSITION(viewport_scale_offset_enbled, 0x192C);
+ASSERT_REG_POSITION(viewport_scale_offset_enabled, 0x192C);
ASSERT_REG_POSITION(viewport_clip_control, 0x193C);
ASSERT_REG_POSITION(user_clip_op, 0x1940);
ASSERT_REG_POSITION(render_enable_override, 0x1944);
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index f61d5998e..505d81c1e 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -16,7 +16,10 @@
#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
#include "video_core/macro/macro_interpreter.h"
+
+#ifdef ARCHITECTURE_x86_64
#include "video_core/macro/macro_jit_x64.h"
+#endif
namespace Tegra {
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 8a8adbb42..f896591bf 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -22,35 +22,29 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff));
maxwell3d.regs.global_base_instance_index = parameters[5];
- maxwell3d.mme_draw.instance_count = instance_count;
maxwell3d.regs.global_base_vertex_index = parameters[3];
maxwell3d.regs.index_buffer.count = parameters[1];
maxwell3d.regs.index_buffer.first = parameters[4];
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, instance_count);
}
maxwell3d.regs.index_buffer.count = 0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
- const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
maxwell3d.regs.vertex_buffer.first = parameters[3];
maxwell3d.regs.vertex_buffer.count = parameters[1];
maxwell3d.regs.global_base_instance_index = parameters[4];
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
- maxwell3d.mme_draw.instance_count = count;
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(false, true);
+ maxwell3d.Rasterizer().Draw(false, instance_count);
}
maxwell3d.regs.vertex_buffer.count = 0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
@@ -63,24 +57,21 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.regs.global_base_vertex_index = element_base;
maxwell3d.regs.global_base_instance_index = base_instance;
- maxwell3d.mme_draw.instance_count = instance_count;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, element_base);
- maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, element_base, true);
+ maxwell3d.CallMethod(0x8e5, base_instance, true);
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, instance_count);
}
maxwell3d.regs.vertex_id_base = 0x0;
maxwell3d.regs.index_buffer.count = 0;
maxwell3d.regs.global_base_vertex_index = 0x0;
maxwell3d.regs.global_base_instance_index = 0x0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, 0x0);
- maxwell3d.CallMethodFromMME(0x8e5, 0x0);
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, 0x0, true);
+ maxwell3d.CallMethod(0x8e5, 0x0, true);
}
// Multidraw Indirect
@@ -91,11 +82,9 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.index_buffer.count = 0;
maxwell3d.regs.global_base_vertex_index = 0x0;
maxwell3d.regs.global_base_instance_index = 0x0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, 0x0);
- maxwell3d.CallMethodFromMME(0x8e5, 0x0);
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, 0x0, true);
+ maxwell3d.CallMethod(0x8e5, 0x0, true);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
});
const u32 start_indirect = parameters[0];
@@ -127,15 +116,13 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.index_buffer.count = num_vertices;
maxwell3d.regs.global_base_vertex_index = base_vertex;
maxwell3d.regs.global_base_instance_index = base_instance;
- maxwell3d.mme_draw.instance_count = instance_count;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, base_vertex);
- maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, base_vertex, true);
+ maxwell3d.CallMethod(0x8e5, base_instance, true);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, instance_count);
}
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
}
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index f670b1bca..c0d32c112 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -335,7 +335,7 @@ void MacroInterpreterImpl::SetMethodAddress(u32 address) {
}
void MacroInterpreterImpl::Send(u32 value) {
- maxwell3d.CallMethodFromMME(method_address.address, value);
+ maxwell3d.CallMethod(method_address.address, value, true);
// Increment the method address by the method increment.
method_address.address.Assign(method_address.address.Value() +
method_address.increment.Value());
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index a302a9603..25c1ce798 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -346,7 +346,7 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
}
void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
- maxwell3d->CallMethodFromMME(method_address.address, value);
+ maxwell3d->CallMethod(method_address.address, value, true);
}
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 384350dbd..8c8dfcca6 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -45,7 +45,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
kind_valus.fill(PTEKind::INVALID);
big_kinds.resize(big_page_table_size / 32, kind_valus);
entries.resize(page_table_size / 32, 0);
- kinds.resize(big_page_table_size / 32, kind_valus);
+ kinds.resize(page_table_size / 32, kind_valus);
}
MemoryManager::~MemoryManager() = default;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index d2d40884c..1cbfef090 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -40,7 +40,7 @@ public:
virtual ~RasterizerInterface() = default;
/// Dispatches a draw invocation
- virtual void Draw(bool is_indexed, bool is_instanced) = 0;
+ virtual void Draw(bool is_indexed, u32 instance_count) = 0;
/// Clear the current framebuffer
virtual void Clear() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 08f4d69ab..6af4ae793 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -29,17 +29,17 @@ constexpr std::array PROGRAM_LUT{
[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
switch (gl_format) {
case GL_RGBA8_SNORM:
- return GL_RGBA8;
+ return GL_RGBA8I;
case GL_R8_SNORM:
- return GL_R8;
+ return GL_R8I;
case GL_RGBA16_SNORM:
- return GL_RGBA16;
+ return GL_RGBA16I;
case GL_R16_SNORM:
- return GL_R16;
+ return GL_R16I;
case GL_RG16_SNORM:
- return GL_RG16;
+ return GL_RG16I;
case GL_RG8_SNORM:
- return GL_RG8;
+ return GL_RG8I;
default:
return gl_format;
}
@@ -96,9 +96,6 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
texture.Create(GL_TEXTURE_BUFFER);
const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
const GLenum texture_format{GetTextureBufferFormat(gl_format)};
- if (texture_format != gl_format) {
- LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
- }
glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
views.push_back({
.offset = offset,
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 1d20a79ec..c115dabe1 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -503,6 +503,17 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
float_image_scaling_mask, down_factor, 0.0f);
}
}
+ if (info.uses_render_area) {
+ const auto render_area_width(static_cast<GLfloat>(regs.surface_clip.width));
+ const auto render_area_height(static_cast<GLfloat>(regs.surface_clip.height));
+ if (use_assembly) {
+ glProgramLocalParameter4fARB(AssemblyStage(stage), 1, render_area_width,
+ render_area_height, 0.0f, 0.0f);
+ } else {
+ glProgramUniform4f(source_programs[stage].handle, 1, render_area_width,
+ render_area_height, 0.0f, 0.0f);
+ }
+ }
}};
if constexpr (Spec::enabled_stages[0]) {
prepare_stage(0);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e5c09a969..8a8b5ce54 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -205,7 +205,7 @@ void RasterizerOpenGL::Clear() {
++num_queued_commands;
}
-void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
+void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
@@ -222,14 +222,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
+ BindInlineIndexBuffer();
+
SyncState();
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology);
BeginTransformFeedback(pipeline, primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d->regs.global_base_instance_index);
- const GLsizei num_instances =
- static_cast<GLsizei>(is_instanced ? maxwell3d->mme_draw.instance_count : 1);
+ const GLsizei num_instances = static_cast<GLsizei>(instance_count);
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.global_base_vertex_index);
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.index_buffer.count);
@@ -617,6 +618,16 @@ void RasterizerOpenGL::SyncViewport() {
}
flags[Dirty::Viewport0 + index] = false;
+ if (!regs.viewport_scale_offset_enabled) {
+ const auto x = static_cast<GLfloat>(regs.surface_clip.x);
+ const auto y = static_cast<GLfloat>(regs.surface_clip.y);
+ const auto width = static_cast<GLfloat>(regs.surface_clip.width);
+ const auto height = static_cast<GLfloat>(regs.surface_clip.height);
+ glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f,
+ height != 0.0f ? height : 1.0f);
+ continue;
+ }
+
const auto& src = regs.viewport_transform[index];
GLfloat x = conv(src.translate_x - src.scale_x);
GLfloat y = conv(src.translate_y - src.scale_y);
@@ -1129,6 +1140,16 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
+void RasterizerOpenGL::BindInlineIndexBuffer() {
+ if (maxwell3d->inline_index_draw_indexes.empty()) {
+ return;
+ }
+ const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size());
+ auto buffer = Buffer(buffer_cache_runtime, *this, 0, data_count);
+ buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes);
+ buffer_cache_runtime.BindIndexBuffer(buffer, 0, data_count);
+}
+
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 45131b785..793e0d608 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -68,7 +68,7 @@ public:
StateTracker& state_tracker_);
~RasterizerOpenGL() override;
- void Draw(bool is_indexed, bool is_instanced) override;
+ void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -199,6 +199,8 @@ private:
/// End a transform feedback
void EndTransformFeedback();
+ void BindInlineIndexBuffer();
+
Tegra::GPU& gpu;
const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index e94cfdb1a..3fe04a115 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -49,7 +49,7 @@ using VideoCommon::LoadPipelines;
using VideoCommon::SerializePipeline;
using Context = ShaderContext::Context;
-constexpr u32 CACHE_VERSION = 6;
+constexpr u32 CACHE_VERSION = 7;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -76,7 +76,8 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
}
break;
case Shader::Stage::TessellationEval:
- info.tess_clockwise = key.tessellation_clockwise != 0;
+ // Flip the face, as OpenGL's drawing is flipped.
+ info.tess_clockwise = key.tessellation_clockwise == 0;
info.tess_primitive = [&key] {
switch (key.tessellation_primitive) {
case Maxwell::Tessellation::DomainType::Isolines:
@@ -218,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
+ .support_snorm_render_buffer = false,
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index a359f96f1..d53b422ca 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -70,8 +70,8 @@ void SetupDirtyViewports(Tables& tables) {
FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
- tables[0][OFF(viewport_scale_offset_enbled)] = ViewportTransform;
- tables[1][OFF(viewport_scale_offset_enbled)] = Viewports;
+ tables[0][OFF(viewport_scale_offset_enabled)] = ViewportTransform;
+ tables[1][OFF(viewport_scale_offset_enabled)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index b24f3424a..b7843e995 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -68,13 +68,15 @@ public:
}
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
+ using Shader::Backend::SPIRV::RenderAreaLayout;
using Shader::Backend::SPIRV::RescalingLayout;
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
const VkPushConstantRange range{
.stageFlags = static_cast<VkShaderStageFlags>(
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
.offset = 0,
- .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
+ .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset +
+ static_cast<u32>(sizeof(RenderAreaLayout)),
};
return device->GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
@@ -167,6 +169,12 @@ private:
u32 image_bit{1u};
};
+class RenderAreaPushConstant {
+public:
+ bool uses_render_area{};
+ std::array<f32, 4> words{};
+};
+
inline void PushImageDescriptors(TextureCache& texture_cache,
UpdateDescriptorQueue& update_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index cb7fa2078..89426121f 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -480,11 +480,15 @@ void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
fsr.reset();
}
- if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
+ if (framebuffer.width == raw_width && framebuffer.height == raw_height &&
+ framebuffer.pixel_format == pixel_format && !raw_images.empty()) {
return;
}
+
raw_width = framebuffer.width;
raw_height = framebuffer.height;
+ pixel_format = framebuffer.pixel_format;
+
ReleaseRawImages();
CreateStagingBuffer(framebuffer);
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 29e2ea925..a2b73ec54 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -28,6 +28,10 @@ namespace VideoCore {
class RasterizerInterface;
}
+namespace Service::android {
+enum class PixelFormat : u32;
+}
+
namespace Vulkan {
struct ScreenInfo;
@@ -156,6 +160,7 @@ private:
u32 raw_width = 0;
u32 raw_height = 0;
+ Service::android::PixelFormat pixel_format{};
std::unique_ptr<FSR> fsr;
};
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index c3f66c8a3..1aa116cea 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -31,6 +31,7 @@ namespace {
using boost::container::small_vector;
using boost::container::static_vector;
using Shader::ImageBufferDescriptor;
+using Shader::Backend::SPIRV::RENDERAREA_LAYOUT_OFFSET;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
@@ -433,12 +434,19 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
update_descriptor_queue.Acquire();
RescalingPushConstant rescaling;
+ RenderAreaPushConstant render_area;
const VkSampler* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
buffer_cache.BindHostStageBuffers(stage);
PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling,
samplers_it, views_it);
+ const auto& info{stage_infos[0]};
+ if (info.uses_render_area) {
+ render_area.uses_render_area = true;
+ render_area.words = {static_cast<float>(regs.surface_clip.width),
+ static_cast<float>(regs.surface_clip.height)};
+ }
}};
if constexpr (Spec::enabled_stages[0]) {
prepare_stage(0);
@@ -455,10 +463,11 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
- ConfigureDraw(rescaling);
+ ConfigureDraw(rescaling, render_area);
}
-void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
+void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
+ const RenderAreaPushConstant& render_area) {
texture_cache.UpdateRenderTargets(false);
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
@@ -474,7 +483,9 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
- is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) {
+ is_rescaling, update_rescaling,
+ uses_render_area = render_area.uses_render_area,
+ render_area_data = render_area.words](vk::CommandBuffer cmdbuf) {
if (bind_pipeline) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
}
@@ -488,6 +499,11 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor),
&scale_down_factor);
}
+ if (uses_render_area) {
+ cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+ RENDERAREA_LAYOUT_OFFSET, sizeof(render_area_data),
+ &render_area_data);
+ }
if (!descriptor_set_layout) {
return;
}
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 85602592b..6bf577d25 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -62,6 +62,7 @@ class Device;
class PipelineStatistics;
class RenderPassCache;
class RescalingPushConstant;
+class RenderAreaPushConstant;
class Scheduler;
class UpdateDescriptorQueue;
@@ -119,7 +120,8 @@ private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
- void ConfigureDraw(const RescalingPushConstant& rescaling);
+ void ConfigureDraw(const RescalingPushConstant& rescaling,
+ const RenderAreaPushConstant& render_are);
void MakePipeline(VkRenderPass render_pass);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 13d5a1f67..d4b0a542a 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -53,7 +53,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
-constexpr u32 CACHE_VERSION = 6;
+constexpr u32 CACHE_VERSION = 7;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -166,6 +166,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
}
break;
case Shader::Stage::TessellationEval:
+ info.tess_clockwise = key.state.tessellation_clockwise != 0;
info.tess_primitive = [&key] {
const u32 raw{key.state.tessellation_primitive.Value()};
switch (static_cast<Maxwell::Tessellation::DomainType>(raw)) {
@@ -325,6 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_int64 = device.IsShaderInt64Supported(),
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
+ .support_snorm_render_buffer = true,
};
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 47dfb45a1..f69c0c50f 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -127,11 +127,10 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3
return scissor;
}
-DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
- bool is_indexed) {
+DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_indexed) {
DrawParams params{
.base_instance = regs.global_base_instance_index,
- .num_instances = is_instanced ? num_instances : 1,
+ .num_instances = num_instances,
.base_vertex = is_indexed ? regs.global_base_vertex_index : regs.vertex_buffer.first,
.num_vertices = is_indexed ? regs.index_buffer.count : regs.vertex_buffer.count,
.first_index = is_indexed ? regs.index_buffer.first : 0,
@@ -157,12 +156,10 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
update_descriptor_queue(device, scheduler),
blit_image(device, scheduler, state_tracker, descriptor_pool),
- astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
- memory_allocator),
render_pass_cache(device), texture_cache_runtime{device, scheduler,
memory_allocator, staging_pool,
- blit_image, astc_decoder_pass,
- render_pass_cache},
+ blit_image, render_pass_cache,
+ descriptor_pool, update_descriptor_queue},
texture_cache(texture_cache_runtime, *this),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
update_descriptor_queue, descriptor_pool),
@@ -177,7 +174,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
RasterizerVulkan::~RasterizerVulkan() = default;
-void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
+void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
@@ -194,13 +191,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
+ BindInlineIndexBuffer();
+
BeginTransformFeedback();
UpdateDynamicStates();
const auto& regs{maxwell3d->regs};
- const u32 num_instances{maxwell3d->mme_draw.instance_count};
- const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
+ const u32 num_instances{instance_count};
+ const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
@@ -304,14 +303,19 @@ void RasterizerVulkan::Clear() {
}
}
- scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
- const VkClearAttachment attachment{
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .colorAttachment = color_attachment,
- .clearValue = clear_value,
- };
- cmdbuf.ClearAttachments(attachment, clear_rect);
- });
+ if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B &&
+ regs.clear_surface.A) {
+ scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
+ const VkClearAttachment attachment{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = color_attachment,
+ .clearValue = clear_value,
+ };
+ cmdbuf.ClearAttachments(attachment, clear_rect);
+ });
+ } else {
+ UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel");
+ }
}
if (!use_depth && !use_stencil) {
@@ -679,6 +683,22 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
if (!state_tracker.TouchViewports()) {
return;
}
+ if (!regs.viewport_scale_offset_enabled) {
+ const auto x = static_cast<float>(regs.surface_clip.x);
+ const auto y = static_cast<float>(regs.surface_clip.y);
+ const auto width = static_cast<float>(regs.surface_clip.width);
+ const auto height = static_cast<float>(regs.surface_clip.height);
+ VkViewport viewport{
+ .x = x,
+ .y = y,
+ .width = width != 0.0f ? width : 1.0f,
+ .height = height != 0.0f ? height : 1.0f,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ };
+ scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewport); });
+ return;
+ }
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
const std::array viewports{
@@ -1009,4 +1029,17 @@ void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
+void RasterizerVulkan::BindInlineIndexBuffer() {
+ if (maxwell3d->inline_index_draw_indexes.empty()) {
+ return;
+ }
+ const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size());
+ auto buffer = buffer_cache_runtime.UploadStagingBuffer(data_count);
+ std::memcpy(buffer.mapped_span.data(), maxwell3d->inline_index_draw_indexes.data(), data_count);
+ buffer_cache_runtime.BindIndexBuffer(
+ maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format,
+ maxwell3d->regs.index_buffer.first, maxwell3d->regs.index_buffer.count, buffer.buffer,
+ static_cast<u32>(buffer.offset), data_count);
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 4cde3c983..b0bc306f5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -64,7 +64,7 @@ public:
StateTracker& state_tracker_, Scheduler& scheduler_);
~RasterizerVulkan() override;
- void Draw(bool is_indexed, bool is_instanced) override;
+ void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -141,6 +141,8 @@ private:
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
+ void BindInlineIndexBuffer();
+
Tegra::GPU& gpu;
ScreenInfo& screen_info;
@@ -153,7 +155,6 @@ private:
DescriptorPool descriptor_pool;
UpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
- ASTCDecoderPass astc_decoder_pass;
RenderPassCache render_pass_cache;
TextureCacheRuntime texture_cache_runtime;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index c04aad08f..929216749 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -144,7 +144,6 @@ private:
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
- recorded_counts++;
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
@@ -166,7 +165,7 @@ private:
}
bool Empty() const {
- return recorded_counts == 0;
+ return command_offset == 0;
}
bool HasSubmit() const {
@@ -177,7 +176,6 @@ private:
Command* first = nullptr;
Command* last = nullptr;
- size_t recorded_counts = 0;
size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index b87c3be66..edb41b171 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -51,7 +51,7 @@ Flags MakeInvalidationFlags() {
void SetupDirtyViewports(Tables& tables) {
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
- tables[0][OFF(viewport_scale_offset_enbled)] = Viewports;
+ tables[0][OFF(viewport_scale_offset_enabled)] = Viewports;
tables[1][OFF(window_origin)] = Viewports;
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 305ad8aee..853b80d8a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -791,12 +791,17 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
- ASTCDecoderPass& astc_decoder_pass_,
- RenderPassCache& render_pass_cache_)
+ RenderPassCache& render_pass_cache_,
+ DescriptorPool& descriptor_pool,
+ UpdateDescriptorQueue& update_descriptor_queue)
: device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_},
staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_},
- astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_},
- resolution{Settings::values.resolution_info} {}
+ render_pass_cache{render_pass_cache_}, resolution{Settings::values.resolution_info} {
+ if (Settings::values.accelerate_astc) {
+ astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
+ update_descriptor_queue, memory_allocator);
+ }
+}
void TextureCacheRuntime::Finish() {
scheduler.Finish();
@@ -1782,17 +1787,17 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
const auto& resolution = runtime.resolution;
- u32 width = 0;
- u32 height = 0;
+ u32 width = std::numeric_limits<u32>::max();
+ u32 height = std::numeric_limits<u32>::max();
for (size_t index = 0; index < NUM_RT; ++index) {
const ImageView* const color_buffer = color_buffers[index];
if (!color_buffer) {
renderpass_key.color_formats[index] = PixelFormat::Invalid;
continue;
}
- width = std::max(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width)
+ width = std::min(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width)
: color_buffer->size.width);
- height = std::max(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height)
+ height = std::min(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height)
: color_buffer->size.height);
attachments.push_back(color_buffer->RenderTarget());
renderpass_key.color_formats[index] = color_buffer->format;
@@ -1804,9 +1809,9 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
}
const size_t num_colors = attachments.size();
if (depth_buffer) {
- width = std::max(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width)
+ width = std::min(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width)
: depth_buffer->size.width);
- height = std::max(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height)
+ height = std::min(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height)
: depth_buffer->size.height);
attachments.push_back(depth_buffer->RenderTarget());
renderpass_key.depth_format = depth_buffer->format;
@@ -1845,7 +1850,7 @@ void TextureCacheRuntime::AccelerateImageUpload(
Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
if (IsPixelFormatASTC(image.info.format)) {
- return astc_decoder_pass.Assemble(image, map, swizzles);
+ return astc_decoder_pass->Assemble(image, map, swizzles);
}
ASSERT(false);
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 0b7ac0df1..7ec0df134 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -6,6 +6,7 @@
#include <span>
#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -25,14 +26,15 @@ using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
using VideoCore::Surface::PixelFormat;
-class ASTCDecoderPass;
class BlitImageHelper;
+class DescriptorPool;
class Device;
class Image;
class ImageView;
class Framebuffer;
class RenderPassCache;
class StagingBufferPool;
+class UpdateDescriptorQueue;
class Scheduler;
class TextureCacheRuntime {
@@ -41,8 +43,9 @@ public:
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
- ASTCDecoderPass& astc_decoder_pass_,
- RenderPassCache& render_pass_cache_);
+ RenderPassCache& render_pass_cache_,
+ DescriptorPool& descriptor_pool,
+ UpdateDescriptorQueue& update_descriptor_queue);
void Finish();
@@ -97,8 +100,8 @@ public:
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
- ASTCDecoderPass& astc_decoder_pass;
RenderPassCache& render_pass_cache;
+ std::optional<ASTCDecoderPass> astc_decoder_pass;
const Settings::ResolutionScalingInfo& resolution;
constexpr static size_t indexing_slots = 8 * sizeof(size_t);
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index fbabb3219..f24f320b6 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -19,6 +19,7 @@
#include "video_core/engines/kepler_compute.h"
#include "video_core/memory_manager.h"
#include "video_core/shader_environment.h"
+#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
@@ -33,7 +34,7 @@ static u64 MakeCbufKey(u32 index, u32 offset) {
return (static_cast<u64>(index) << 32) | offset;
}
-static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
+static Shader::TextureType ConvertTextureType(const Tegra::Texture::TICEntry& entry) {
switch (entry.texture_type) {
case Tegra::Texture::TextureType::Texture1D:
return Shader::TextureType::Color1D;
@@ -59,6 +60,26 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
}
}
+static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture::TICEntry& entry) {
+ switch (PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type,
+ entry.a_type, entry.srgb_conversion)) {
+ case VideoCore::Surface::PixelFormat::A8B8G8R8_SNORM:
+ return Shader::TexturePixelFormat::A8B8G8R8_SNORM;
+ case VideoCore::Surface::PixelFormat::R8_SNORM:
+ return Shader::TexturePixelFormat::R8_SNORM;
+ case VideoCore::Surface::PixelFormat::R8G8_SNORM:
+ return Shader::TexturePixelFormat::R8G8_SNORM;
+ case VideoCore::Surface::PixelFormat::R16G16B16A16_SNORM:
+ return Shader::TexturePixelFormat::R16G16B16A16_SNORM;
+ case VideoCore::Surface::PixelFormat::R16G16_SNORM:
+ return Shader::TexturePixelFormat::R16G16_SNORM;
+ case VideoCore::Surface::PixelFormat::R16_SNORM:
+ return Shader::TexturePixelFormat::R16_SNORM;
+ default:
+ return Shader::TexturePixelFormat::OTHER;
+ }
+}
+
static std::string_view StageToPrefix(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexB:
@@ -178,22 +199,31 @@ void GenericEnvironment::Dump(u64 hash) {
void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSize())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
+ const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
+ .write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
+ sizeof(num_texture_pixel_formats))
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
.write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
.write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
+ .write(reinterpret_cast<const char*>(&viewport_transform_state),
+ sizeof(viewport_transform_state))
.write(reinterpret_cast<const char*>(&stage), sizeof(stage))
.write(reinterpret_cast<const char*>(code.data()), code_size);
for (const auto& [key, type] : texture_types) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
}
+ for (const auto& [key, format] : texture_pixel_formats) {
+ file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+ .write(reinterpret_cast<const char*>(&format), sizeof(format));
+ }
for (const auto& [key, type] : cbuf_values) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
@@ -237,15 +267,13 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
return std::nullopt;
}
-Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
- bool via_header_index, u32 raw) {
+Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
+ bool via_header_index, u32 raw) {
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
- const Shader::TextureType result{ConvertType(entry)};
- texture_types.emplace(raw, result);
- return result;
+ return entry;
}
GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -305,8 +333,27 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
- return ReadTextureTypeImpl(regs.tex_header.Address(), regs.tex_header.limit, via_header_index,
- handle);
+ auto entry =
+ ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
+ const Shader::TextureType result{ConvertTextureType(entry)};
+ texture_types.emplace(handle, result);
+ return result;
+}
+
+Shader::TexturePixelFormat GraphicsEnvironment::ReadTexturePixelFormat(u32 handle) {
+ const auto& regs{maxwell3d->regs};
+ const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
+ auto entry =
+ ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
+ const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry));
+ texture_pixel_formats.emplace(handle, result);
+ return result;
+}
+
+u32 GraphicsEnvironment::ReadViewportTransformState() {
+ const auto& regs{maxwell3d->regs};
+ viewport_transform_state = regs.viewport_scale_offset_enabled;
+ return viewport_transform_state;
}
ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
@@ -337,21 +384,41 @@ u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
const auto& regs{kepler_compute->regs};
const auto& qmd{kepler_compute->launch_description};
- return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
+ auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
+ const Shader::TextureType result{ConvertTextureType(entry)};
+ texture_types.emplace(handle, result);
+ return result;
+}
+
+Shader::TexturePixelFormat ComputeEnvironment::ReadTexturePixelFormat(u32 handle) {
+ const auto& regs{kepler_compute->regs};
+ const auto& qmd{kepler_compute->launch_description};
+ auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
+ const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry));
+ texture_pixel_formats.emplace(handle, result);
+ return result;
+}
+
+u32 ComputeEnvironment::ReadViewportTransformState() {
+ return viewport_transform_state;
}
void FileEnvironment::Deserialize(std::ifstream& file) {
u64 code_size{};
u64 num_texture_types{};
+ u64 num_texture_pixel_formats{};
u64 num_cbuf_values{};
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
+ .read(reinterpret_cast<char*>(&num_texture_pixel_formats),
+ sizeof(num_texture_pixel_formats))
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
.read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
.read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
+ .read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state))
.read(reinterpret_cast<char*>(&stage), sizeof(stage));
code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
file.read(reinterpret_cast<char*>(code.get()), code_size);
@@ -362,6 +429,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
.read(reinterpret_cast<char*>(&type), sizeof(type));
texture_types.emplace(key, type);
}
+ for (size_t i = 0; i < num_texture_pixel_formats; ++i) {
+ u32 key;
+ Shader::TexturePixelFormat format;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key))
+ .read(reinterpret_cast<char*>(&format), sizeof(format));
+ texture_pixel_formats.emplace(key, format);
+ }
for (size_t i = 0; i < num_cbuf_values; ++i) {
u64 key;
u32 value;
@@ -409,6 +483,18 @@ Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
return it->second;
}
+Shader::TexturePixelFormat FileEnvironment::ReadTexturePixelFormat(u32 handle) {
+ const auto it{texture_pixel_formats.find(handle)};
+ if (it == texture_pixel_formats.end()) {
+ throw Shader::LogicError("Uncached read texture pixel format");
+ }
+ return it->second;
+}
+
+u32 FileEnvironment::ReadViewportTransformState() {
+ return viewport_transform_state;
+}
+
u32 FileEnvironment::LocalMemorySize() const {
return local_memory_size;
}
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 8b3b8e9f5..bb55b029f 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -63,14 +63,15 @@ public:
protected:
std::optional<u64> TryFindSize();
- Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
- u32 raw);
+ Tegra::Texture::TICEntry ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
+ bool via_header_index, u32 raw);
Tegra::MemoryManager* gpu_memory{};
GPUVAddr program_base{};
std::vector<u64> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
+ std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
u32 local_memory_size{};
@@ -85,6 +86,8 @@ protected:
u32 cached_highest = 0;
u32 initial_offset = 0;
+ u32 viewport_transform_state = 1;
+
bool has_unbound_instructions = false;
};
@@ -102,6 +105,10 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
+ Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
+
+ u32 ReadViewportTransformState() override;
+
private:
Tegra::Engines::Maxwell3D* maxwell3d{};
size_t stage_index{};
@@ -120,6 +127,10 @@ public:
Shader::TextureType ReadTextureType(u32 handle) override;
+ Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
+
+ u32 ReadViewportTransformState() override;
+
private:
Tegra::Engines::KeplerCompute* kepler_compute{};
};
@@ -143,6 +154,10 @@ public:
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
+ [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
+
+ [[nodiscard]] u32 ReadViewportTransformState() override;
+
[[nodiscard]] u32 LocalMemorySize() const override;
[[nodiscard]] u32 SharedMemorySize() const override;
@@ -156,6 +171,7 @@ public:
private:
std::unique_ptr<u64[]> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
+ std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
std::array<u32, 3> workgroup_size{};
u32 local_memory_size{};
@@ -164,6 +180,7 @@ private:
u32 read_lowest{};
u32 read_highest{};
u32 initial_offset{};
+ u32 viewport_transform_state = 1;
};
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 1223df5a0..e8c908b42 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -516,7 +516,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
- UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
UNIMPLEMENTED_IF(copy.image_offset.x != 0);
UNIMPLEMENTED_IF(copy.image_offset.y != 0);
UNIMPLEMENTED_IF(copy.image_offset.z != 0);