summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h40
-rw-r--r--src/video_core/engines/maxwell_3d.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp29
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp10
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp143
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp37
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h3
-rw-r--r--src/video_core/shader/decode/other.cpp14
-rw-r--r--src/video_core/shader/node.h5
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/texture_cache.h53
19 files changed, 217 insertions, 166 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f00c71dae..d6ee82836 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -229,7 +229,7 @@ endif()
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad)
+target_link_libraries(video_core PRIVATE glad xbyak)
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d9a4a1b4d..b88fce2cd 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -56,24 +56,28 @@ public:
if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
auto& memory_manager = system.GPU().MemoryManager();
+ const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) {
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- return ConstBufferUpload(host_ptr, size);
+ u8* dest;
+ if (is_granular) {
+ dest = memory_manager.GetPointer(gpu_addr);
} else {
staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- return ConstBufferUpload(staging_buffer.data(), size);
+ dest = staging_buffer.data();
+ memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
}
+ return ConstBufferUpload(dest, size);
+ }
+ if (is_granular) {
+ u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
+ return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
+ std::memcpy(dest, host_ptr, size);
+ });
} else {
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- return StreamBufferUpload(host_ptr, size, alignment);
- } else {
- staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- return StreamBufferUpload(staging_buffer.data(), size, alignment);
- }
+ return StreamBufferUpload(
+ size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
+ memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
+ });
}
}
}
@@ -101,7 +105,9 @@ public:
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
std::size_t alignment = 4) {
std::lock_guard lock{mutex};
- return StreamBufferUpload(raw_pointer, size, alignment);
+ return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
+ std::memcpy(dest, raw_pointer, size);
+ });
}
void Map(std::size_t max_size) {
@@ -424,11 +430,11 @@ private:
map->MarkAsModified(false, 0);
}
- BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
- std::size_t alignment) {
+ template <typename Callable>
+ BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
AlignBuffer(alignment);
const std::size_t uploaded_offset = buffer_offset;
- std::memcpy(buffer_ptr, raw_pointer, size);
+ callable(buffer_ptr);
buffer_ptr += size;
buffer_offset += size;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 024c9e43b..13ef2e42d 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -106,7 +106,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.rasterize_enable = 1;
regs.rt_separate_frag_data = 1;
regs.framebuffer_srgb = 1;
+ regs.line_width_aliased = 1.0f;
+ regs.line_width_smooth = 1.0f;
regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
+ regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
+ regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
shadow_state = regs;
@@ -457,8 +461,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
- ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
- "Units other than CROP are unimplemented");
+ if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
+ LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
+ }
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
@@ -534,8 +539,8 @@ void Maxwell3D::ProcessCounterReset() {
rasterizer.ResetCounter(QueryType::SamplesPassed);
break;
default:
- LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
- static_cast<int>(regs.counter_reset));
+ LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
+ static_cast<int>(regs.counter_reset));
break;
}
}
@@ -592,8 +597,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
system.GPU().GetTicks());
return {};
default:
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
+ LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
return 1;
}
}
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 466a911db..e1b245288 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -166,8 +166,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
- const bool is_intel = vendor == "Intel";
- const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -182,7 +180,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
- has_broken_compute = is_intel_proprietary;
has_fast_buffer_sub_data = is_nvidia;
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
GLAD_GL_NV_compute_program5;
@@ -206,7 +203,6 @@ Device::Device(std::nullptr_t) {
has_image_load_formatted = true;
has_variable_aoffi = true;
has_component_indexing_bug = false;
- has_broken_compute = false;
has_precise_bug = false;
}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e915dbd86..683ed9002 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -80,10 +80,6 @@ public:
return has_precise_bug;
}
- bool HasBrokenCompute() const {
- return has_broken_compute;
- }
-
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
@@ -109,7 +105,6 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
- bool has_broken_compute{};
bool has_fast_buffer_sub_data{};
bool use_assembly_shaders{};
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 716d43e65..3c421dd16 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -655,10 +655,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- if (device.HasBrokenCompute()) {
- return;
- }
-
buffer_cache.Acquire();
current_cbuf = 0;
@@ -1024,6 +1020,26 @@ void RasterizerOpenGL::SyncViewport() {
const auto& regs = gpu.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
+ const bool dirty_clip_control = flags[Dirty::ClipControl];
+
+ if (dirty_clip_control || flags[Dirty::FrontFace]) {
+ flags[Dirty::FrontFace] = false;
+
+ GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
+ if (regs.screen_y_control.triangle_rast_flip != 0 &&
+ regs.viewport_transform[0].scale_y < 0.0f) {
+ switch (mode) {
+ case GL_CW:
+ mode = GL_CCW;
+ break;
+ case GL_CCW:
+ mode = GL_CW;
+ break;
+ }
+ }
+ glFrontFace(mode);
+ }
+
if (dirty_viewport || flags[Dirty::ClipControl]) {
flags[Dirty::ClipControl] = false;
@@ -1121,11 +1137,6 @@ void RasterizerOpenGL::SyncCullMode() {
glDisable(GL_CULL_FACE);
}
}
-
- if (flags[Dirty::FrontFace]) {
- flags[Dirty::FrontFace] = false;
- glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
- }
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 253484968..9cb115959 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2344,7 +2344,12 @@ private:
return {};
}
- Expression MemoryBarrierGL(Operation) {
+ Expression MemoryBarrierGroup(Operation) {
+ code.AddLine("groupMemoryBarrier();");
+ return {};
+ }
+
+ Expression MemoryBarrierGlobal(Operation) {
code.AddLine("memoryBarrier();");
return {};
}
@@ -2591,7 +2596,8 @@ private:
&GLSLDecompiler::ShuffleIndexed,
&GLSLDecompiler::Barrier,
- &GLSLDecompiler::MemoryBarrierGL,
+ &GLSLDecompiler::MemoryBarrierGroup,
+ &GLSLDecompiler::MemoryBarrierGlobal,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6b489e6db..e7952924a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
bool RendererOpenGL::Init() {
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
+ if (Settings::values.renderer_debug) {
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+ }
glDebugMessageCallback(DebugHandler, nullptr);
}
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 568744e3c..424278816 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
u32 packed_front_face = PackFrontFace(regs.front_face);
- if (regs.screen_y_control.triangle_rast_flip != 0 &&
- regs.viewport_transform[0].scale_y > 0.0f) {
+ if (regs.screen_y_control.triangle_rast_flip != 0) {
// Flip front face
packed_front_face = 1 - packed_front_face;
}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 2871035f5..62e950d31 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -149,7 +149,7 @@ struct FormatTuple {
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
{VK_FORMAT_UNDEFINED}, // R16S
- {VK_FORMAT_UNDEFINED}, // R16UI
+ {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
{VK_FORMAT_UNDEFINED}, // R16I
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 750e5a0ca..9fd8ac3f6 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
- static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32,
- VK_FORMAT_A8B8G8R8_UINT_PACK32,
- VK_FORMAT_A8B8G8R8_SNORM_PACK32,
- VK_FORMAT_A8B8G8R8_SRGB_PACK32,
- VK_FORMAT_B5G6R5_UNORM_PACK16,
- VK_FORMAT_A2B10G10R10_UNORM_PACK32,
- VK_FORMAT_A1R5G5B5_UNORM_PACK16,
- VK_FORMAT_R32G32B32A32_SFLOAT,
- VK_FORMAT_R32G32B32A32_UINT,
- VK_FORMAT_R32G32_SFLOAT,
- VK_FORMAT_R32G32_UINT,
- VK_FORMAT_R16G16B16A16_UINT,
- VK_FORMAT_R16G16B16A16_SNORM,
- VK_FORMAT_R16G16B16A16_UNORM,
- VK_FORMAT_R16G16_UNORM,
- VK_FORMAT_R16G16_SNORM,
- VK_FORMAT_R16G16_SFLOAT,
- VK_FORMAT_R16_UNORM,
- VK_FORMAT_R8G8B8A8_SRGB,
- VK_FORMAT_R8G8_UNORM,
- VK_FORMAT_R8G8_SNORM,
- VK_FORMAT_R8G8_UINT,
- VK_FORMAT_R8_UNORM,
- VK_FORMAT_R8_UINT,
- VK_FORMAT_B10G11R11_UFLOAT_PACK32,
- VK_FORMAT_R32_SFLOAT,
- VK_FORMAT_R32_UINT,
- VK_FORMAT_R32_SINT,
- VK_FORMAT_R16_SFLOAT,
- VK_FORMAT_R16G16B16A16_SFLOAT,
- VK_FORMAT_B8G8R8A8_UNORM,
- VK_FORMAT_B8G8R8A8_SRGB,
- VK_FORMAT_R4G4B4A4_UNORM_PACK16,
- VK_FORMAT_D32_SFLOAT,
- VK_FORMAT_D16_UNORM,
- VK_FORMAT_D16_UNORM_S8_UINT,
- VK_FORMAT_D24_UNORM_S8_UINT,
- VK_FORMAT_D32_SFLOAT_S8_UINT,
- VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
- VK_FORMAT_BC2_UNORM_BLOCK,
- VK_FORMAT_BC3_UNORM_BLOCK,
- VK_FORMAT_BC4_UNORM_BLOCK,
- VK_FORMAT_BC5_UNORM_BLOCK,
- VK_FORMAT_BC5_SNORM_BLOCK,
- VK_FORMAT_BC7_UNORM_BLOCK,
- VK_FORMAT_BC6H_UFLOAT_BLOCK,
- VK_FORMAT_BC6H_SFLOAT_BLOCK,
- VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
- VK_FORMAT_BC2_SRGB_BLOCK,
- VK_FORMAT_BC3_SRGB_BLOCK,
- VK_FORMAT_BC7_SRGB_BLOCK,
- VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
- VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
- VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
- VK_FORMAT_E5B9G9R9_UFLOAT_PACK32};
+ static constexpr std::array formats{
+ VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_UINT_PACK32,
+ VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_SRGB_PACK32,
+ VK_FORMAT_B5G6R5_UNORM_PACK16,
+ VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R16G16B16A16_UINT,
+ VK_FORMAT_R16G16B16A16_SNORM,
+ VK_FORMAT_R16G16B16A16_UNORM,
+ VK_FORMAT_R16G16_UNORM,
+ VK_FORMAT_R16G16_SNORM,
+ VK_FORMAT_R16G16_SFLOAT,
+ VK_FORMAT_R16_UNORM,
+ VK_FORMAT_R16_UINT,
+ VK_FORMAT_R8G8B8A8_SRGB,
+ VK_FORMAT_R8G8_UNORM,
+ VK_FORMAT_R8G8_SNORM,
+ VK_FORMAT_R8G8_UINT,
+ VK_FORMAT_R8_UNORM,
+ VK_FORMAT_R8_UINT,
+ VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+ VK_FORMAT_R32_SFLOAT,
+ VK_FORMAT_R32_UINT,
+ VK_FORMAT_R32_SINT,
+ VK_FORMAT_R16_SFLOAT,
+ VK_FORMAT_R16G16B16A16_SFLOAT,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ VK_FORMAT_B8G8R8A8_SRGB,
+ VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+ VK_FORMAT_D32_SFLOAT,
+ VK_FORMAT_D16_UNORM,
+ VK_FORMAT_D16_UNORM_S8_UINT,
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+ VK_FORMAT_BC2_UNORM_BLOCK,
+ VK_FORMAT_BC3_UNORM_BLOCK,
+ VK_FORMAT_BC4_UNORM_BLOCK,
+ VK_FORMAT_BC5_UNORM_BLOCK,
+ VK_FORMAT_BC5_SNORM_BLOCK,
+ VK_FORMAT_BC7_UNORM_BLOCK,
+ VK_FORMAT_BC6H_UFLOAT_BLOCK,
+ VK_FORMAT_BC6H_SFLOAT_BLOCK,
+ VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+ VK_FORMAT_BC2_SRGB_BLOCK,
+ VK_FORMAT_BC3_SRGB_BLOCK,
+ VK_FORMAT_BC7_SRGB_BLOCK,
+ VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+ VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+ VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+ VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+ };
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.GetFormatProperties(format));
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a5c7b7945..65a1c6245 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
ASSERT(point_size != 0.0f);
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
- specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
+ const auto& attribute = fixed_state.vertex_input.attributes[i];
+ specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
+ specialization.attribute_types[i] = attribute.Type();
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index be5b77fae..a3d992ed3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
const auto& attrib = regs.vertex_attrib_format[index];
- if (!attrib.IsValid()) {
+ if (attrib.IsConstant()) {
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
continue;
}
-
- [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
- ASSERT(buffer.IsEnabled());
-
vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
attrib.size.Value());
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 890f34a2c..a13e8baa7 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -741,8 +741,10 @@ private:
if (!IsGenericAttribute(index)) {
continue;
}
-
const u32 location = GetGenericAttributeLocation(index);
+ if (!IsAttributeEnabled(location)) {
+ continue;
+ }
const auto type_descriptor = GetAttributeType(location);
Id type;
if (IsInputAttributeArray()) {
@@ -986,6 +988,10 @@ private:
return stage == ShaderType::TesselationControl;
}
+ bool IsAttributeEnabled(u32 location) const {
+ return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
+ }
+
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
@@ -1201,16 +1207,20 @@ private:
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
return {v_float_zero, Type::Float};
default:
- if (IsGenericAttribute(attribute)) {
- const u32 location = GetGenericAttributeLocation(attribute);
- const auto type_descriptor = GetAttributeType(location);
- const Type type = type_descriptor.type;
- const Id attribute_id = input_attributes.at(attribute);
- const std::vector elements = {element};
- const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
- return {OpLoad(GetTypeDefinition(type), pointer), type};
+ if (!IsGenericAttribute(attribute)) {
+ break;
}
- break;
+ const u32 location = GetGenericAttributeLocation(attribute);
+ if (!IsAttributeEnabled(location)) {
+ // Disabled attributes (also known as constant attributes) always return zero.
+ return {v_float_zero, Type::Float};
+ }
+ const auto type_descriptor = GetAttributeType(location);
+ const Type type = type_descriptor.type;
+ const Id attribute_id = input_attributes.at(attribute);
+ const std::vector elements = {element};
+ const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
+ return {OpLoad(GetTypeDefinition(type), pointer), type};
}
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
return {v_float_zero, Type::Float};
@@ -2215,8 +2225,8 @@ private:
return {};
}
- Expression MemoryBarrierGL(Operation) {
- const auto scope = spv::Scope::Device;
+ template <spv::Scope scope>
+ Expression MemoryBarrier(Operation) {
const auto semantics =
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
spv::MemorySemanticsMask::WorkgroupMemory |
@@ -2681,7 +2691,8 @@ private:
&SPIRVDecompiler::ShuffleIndexed,
&SPIRVDecompiler::Barrier,
- &SPIRVDecompiler::MemoryBarrierGL,
+ &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
+ &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f4c05ac3c..b7af26388 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -88,7 +88,8 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
- std::optional<float> point_size{};
+ std::optional<float> point_size;
+ std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
};
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 045f5abb5..c0a8f233f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -299,9 +299,19 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::MEMBAR: {
- UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
- bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+ const OperationCode type = [instr] {
+ switch (instr.membar.type) {
+ case Tegra::Shader::MembarType::CTA:
+ return OperationCode::MemoryBarrierGroup;
+ case Tegra::Shader::MembarType::GL:
+ return OperationCode::MemoryBarrierGlobal;
+ default:
+ UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
+ return OperationCode::MemoryBarrierGlobal;
+ }
+ }();
+ bb.push_back(Operation(type));
break;
}
case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c06512413..c5e5165ff 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -233,8 +233,9 @@ enum class OperationCode {
ThreadLtMask, /// () -> uint
ShuffleIndexed, /// (uint value, uint index) -> uint
- Barrier, /// () -> void
- MemoryBarrierGL, /// () -> void
+ Barrier, /// () -> void
+ MemoryBarrierGroup, /// () -> void
+ MemoryBarrierGlobal, /// () -> void
Amount,
};
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7032e0059..f476f03b0 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 77> DefinitionTable = {{
+constexpr std::array<Table, 78> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{
{TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
{TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
{TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
+ {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
{TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
{TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8bfc541d4..45e3ddd2c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
#include <unordered_map>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView>
class TextureCache {
+ using VectorSurface = boost::container::small_vector<TSurface, 1>;
public:
void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -308,18 +310,20 @@ public:
dst_surface.first->MarkAsModified(true, Tick());
}
- TSurface TryFindFramebufferSurface(VAddr addr) {
+ TSurface TryFindFramebufferSurface(VAddr addr) const {
if (!addr) {
return nullptr;
}
const VAddr page = addr >> registry_page_bits;
- std::vector<TSurface>& list = registry[page];
- for (auto& surface : list) {
- if (surface->GetCpuAddr() == addr) {
- return surface;
- }
+ const auto it = registry.find(page);
+ if (it == registry.end()) {
+ return nullptr;
}
- return nullptr;
+ const auto& list = it->second;
+ const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
+ return surface->GetCpuAddr() == addr;
+ });
+ return found != list.end() ? *found : nullptr;
}
u64 Tick() {
@@ -498,7 +502,7 @@ private:
* @param untopological Indicates to the recycler that the texture has no way
* to match the overlaps due to topological reasons.
**/
- RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
+ RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
if (Settings::IsGPULevelExtreme()) {
return RecycleStrategy::Flush;
@@ -538,9 +542,8 @@ private:
* @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons.
**/
- std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
- const SurfaceParams& params, const GPUVAddr gpu_addr,
- const bool preserve_contents,
+ std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
+ const GPUVAddr gpu_addr, const bool preserve_contents,
const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
for (auto& surface : overlaps) {
@@ -650,7 +653,7 @@ private:
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
**/
- std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
+ std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr) {
if (params.target == SurfaceTarget::Texture3D) {
@@ -708,7 +711,7 @@ private:
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
*/
- std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
+ std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr,
const VAddr cpu_addr,
@@ -810,7 +813,7 @@ private:
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
- std::vector<TSurface> overlaps{current_surface};
+ VectorSurface overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
@@ -1126,23 +1129,25 @@ private:
}
}
- std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
+ VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
if (size == 0) {
return {};
}
const VAddr cpu_addr_end = cpu_addr + size;
- VAddr start = cpu_addr >> registry_page_bits;
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
- std::vector<TSurface> surfaces;
- while (start <= end) {
- std::vector<TSurface>& list = registry[start];
- for (auto& surface : list) {
- if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
- surface->MarkAsPicked(true);
- surfaces.push_back(surface);
+ VectorSurface surfaces;
+ for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
+ const auto it = registry.find(start);
+ if (it == registry.end()) {
+ continue;
+ }
+ for (auto& surface : it->second) {
+ if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
+ continue;
}
+ surface->MarkAsPicked(true);
+ surfaces.push_back(surface);
}
- start++;
}
for (auto& surface : surfaces) {
surface->MarkAsPicked(false);