summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp4
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp4
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp3
-rw-r--r--src/shader_recompiler/ir_opt/rescaling_pass.cpp29
-rw-r--r--src/video_core/engines/maxwell_3d.cpp42
-rw-r--r--src/video_core/engines/maxwell_3d.h35
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp3
-rw-r--r--src/yuzu/configuration/config.cpp2
11 files changed, 117 insertions, 14 deletions
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 286976623..c1c843b8f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -148,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
- config.code_cache_size = 512_MiB;
- config.far_code_offset = 400_MiB;
+ config.code_cache_size = 128_MiB;
+ config.far_code_offset = 100_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index d96226c41..aa74fce4d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -208,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
- config.code_cache_size = 512_MiB;
- config.far_code_offset = 400_MiB;
+ config.code_cache_size = 128_MiB;
+ config.far_code_offset = 100_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index b412957c7..2b360e073 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
struct RescalingLayout {
alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
- alignas(16) u32 down_factor;
+ u32 down_factor;
};
constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 248ad3ced..b22725584 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
Optimization::SsaRewritePass(program);
+ Optimization::ConstantPropagationPass(program);
+
Optimization::GlobalMemoryToStorageBufferPass(program);
Optimization::TexturePass(env, program);
- Optimization::ConstantPropagationPass(program);
-
if (Settings::values.resolution_info.active) {
Optimization::RescalingPass(program);
}
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 38592afd0..ddf497e32 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
/// Tries to track the storage buffer address used by a global memory instruction
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
- if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+ if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
+ inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
return std::nullopt;
}
const IR::Value index{inst->Arg(0)};
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
index c28500dd1..496d4667e 100644
--- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp
+++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
@@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
}
}
+void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
+ size_t index) {
+ const IR::Value composite{inst.Arg(index)};
+ if (composite.IsEmpty()) {
+ return;
+ }
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
+ const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
+ switch (info.type) {
+ case TextureType::ColorArray2D:
+ case TextureType::Color2D:
+ inst.SetArg(index, ir.CompositeConstruct(x, y));
+ break;
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color3D:
+ case TextureType::ColorCube:
+ case TextureType::ColorArrayCube:
+ case TextureType::Buffer:
+ // Nothing to patch here
+ break;
+ }
+}
+
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
@@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
- ScaleIntegerComposite(ir, inst, is_scaled, 2);
+ ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
- ScaleIntegerComposite(ir, inst, is_scaled, 2);
+ ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5d6d217bb..8f2fd28c2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
+ case MAXWELL3D_REG_INDEX(small_index):
+ regs.index_array.count = regs.small_index.count;
+ regs.index_array.first = regs.small_index.first;
+ dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(topology_override):
+ use_topology_override = true;
+ return;
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
@@ -360,6 +369,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
}
+void Maxwell3D::ProcessTopologyOverride() {
+ using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
+ using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
+
+ PrimitiveTopology topology{};
+
+ switch (regs.topology_override) {
+ case PrimitiveTopologyOverride::None:
+ topology = regs.draw.topology;
+ break;
+ case PrimitiveTopologyOverride::Points:
+ topology = PrimitiveTopology::Points;
+ break;
+ case PrimitiveTopologyOverride::Lines:
+ topology = PrimitiveTopology::Lines;
+ break;
+ case PrimitiveTopologyOverride::LineStrip:
+ topology = PrimitiveTopology::LineStrip;
+ break;
+ default:
+ topology = static_cast<PrimitiveTopology>(regs.topology_override);
+ break;
+ }
+
+ if (use_topology_override) {
+ regs.draw.topology.Assign(topology);
+ }
+}
+
void Maxwell3D::FlushMMEInlineDraw() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
@@ -370,6 +408,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
rasterizer->Draw(is_indexed, true);
@@ -529,6 +569,8 @@ void Maxwell3D::DrawArrays() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dc9df6c8b..6d34da046 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -367,6 +367,22 @@ public:
Patches = 0xe,
};
+ // Constants as from NVC0_3D_UNK1970_D3D
+ // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
+ enum class PrimitiveTopologyOverride : u32 {
+ None = 0x0,
+ Points = 0x1,
+ Lines = 0x2,
+ LineStrip = 0x3,
+ Triangles = 0x4,
+ TriangleStrip = 0x5,
+ LinesAdjacency = 0xa,
+ LineStripAdjacency = 0xb,
+ TrianglesAdjacency = 0xc,
+ TriangleStripAdjacency = 0xd,
+ Patches = 0xe,
+ };
+
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
@@ -1200,7 +1216,12 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS_NOINIT(0x7);
+ union {
+ BitField<0, 16, u32> first;
+ BitField<16, 16, u32> count;
+ } small_index;
+
+ INSERT_PADDING_WORDS_NOINIT(0x6);
INSERT_PADDING_WORDS_NOINIT(0x1F);
@@ -1244,7 +1265,11 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
- INSERT_PADDING_WORDS_NOINIT(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+
+ PrimitiveTopologyOverride topology_override;
+
+ INSERT_PADDING_WORDS_NOINIT(0x12);
u32 depth_bounds_enable;
@@ -1531,6 +1556,9 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
+ /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
+ void ProcessTopologyOverride();
+
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
@@ -1569,6 +1597,7 @@ private:
Upload::State upload_state;
bool execute_on{true};
+ bool use_topology_override{false};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1685,6 +1714,7 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(small_index, 0x5F9);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1724,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(topology_override, 0x65C);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 3e96c0f60..4d73427b4 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <cstring>
#include <memory>
#include <optional>
@@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
- const std::array push_constants{base_vertex, index_shift};
+ const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 0f62779de..ca6019a3a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
}
break;
case PixelFormat::A8B8G8R8_UNORM:
- if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
+ if (src_view.format == PixelFormat::S8_UINT_D24_UNORM ||
+ src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
break;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index c2b66ff14..4b943c6ba 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -1155,6 +1155,8 @@ void Config::SaveCpuValues() {
WriteBasicSetting(Settings::values.cpuopt_misc_ir);
WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
WriteBasicSetting(Settings::values.cpuopt_fastmem);
+ WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
+ WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives);
}
qt_config->endGroup();