summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2019-11-13 03:39:45 +0100
committerReinUsesLisp <reinuseslisp@airmail.cc>2019-11-23 01:28:47 +0100
commitdbeb52387979c7e28c0acb03dfc1468146947104 (patch)
tree8c8e681dcc11a137517839dd64d839541cb6f9ce
parentgl_shader_cache: Specialize shader workgroup (diff)
downloadyuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar
yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar.gz
yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar.bz2
yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar.lz
yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar.xz
yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar.zst
yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.zip
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h18
5 files changed, 25 insertions, 29 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bd4e5f6e3..ebfe52e6d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -731,7 +731,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
- launch_desc.block_dim_z);
+ launch_desc.block_dim_z, launch_desc.shared_alloc);
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
state.draw.program_pipeline = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a5789b6d3..982c4e23a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -8,7 +8,9 @@
#include <thread>
#include <unordered_set>
#include <boost/functional/hash.hpp>
+#include "common/alignment.h"
#include "common/assert.h"
+#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
@@ -322,6 +324,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
source +=
fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
variant.block_x, variant.block_y, variant.block_z);
+
+ if (variant.shared_memory_size > 0) {
+ source += fmt::format("shared uint smem[{}];",
+ Common::AlignUp(variant.shared_memory_size, 4) / 4);
+ }
}
source += '\n';
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 51c80bf32..fb2ba0905 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -223,7 +223,7 @@ private:
Type type{};
};
-constexpr const char* GetTypeString(Type type) {
+const char* GetTypeString(Type type) {
switch (type) {
case Type::Bool:
return "bool";
@@ -243,7 +243,7 @@ constexpr const char* GetTypeString(Type type) {
}
}
-constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
+const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
return "1D";
@@ -522,13 +522,6 @@ private:
code.AddNewLine();
}
- void DeclareSharedMemory() {
- if (stage != ProgramType::Compute) {
- return;
- }
- code.AddLine("shared uint {}[];", GetSharedMemory());
- }
-
void DeclareInternalFlags() {
for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
const auto flag_code = static_cast<InternalFlag>(flag);
@@ -867,9 +860,7 @@ private:
}
if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {
- fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
- Type::Uint};
+ return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
}
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1245,9 +1236,7 @@ private:
Type::Uint};
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
ASSERT(stage == ProgramType::Compute);
- target = {
- fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
- Type::Uint};
+ target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress()).AsUint();
const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
@@ -2170,10 +2159,6 @@ private:
return "lmem_" + suffix;
}
- std::string GetSharedMemory() const {
- return fmt::format("smem_{}", suffix);
- }
-
std::string GetInternalFlag(InternalFlag flag) const {
constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
"overflow_flag"};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9156f180a..d2bb8502a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -52,11 +52,11 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler{};
};
-constexpr u32 NativeVersion = 7;
+constexpr u32 NativeVersion = 8;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16);
-static_assert(sizeof(ProgramVariant) == 28);
+static_assert(sizeof(ProgramVariant) == 32);
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 4c7ca004d..6f8e51364 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -64,9 +64,10 @@ struct ProgramVariant final {
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
/// Compute constructor.
- explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept
- : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)} {
- }
+ explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
+ u32 shared_memory_size) noexcept
+ : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
+ shared_memory_size{shared_memory_size} {}
// Graphics specific parameters.
BaseBindings base_bindings{};
@@ -76,11 +77,13 @@ struct ProgramVariant final {
u32 block_x{};
u16 block_y{};
u16 block_z{};
+ u32 shared_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept {
- return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) ==
- std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
- rhs.block_z);
+ return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
+ shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
+ rhs.block_x, rhs.block_y, rhs.block_z,
+ rhs.shared_memory_size);
}
bool operator!=(const ProgramVariant& rhs) const noexcept {
@@ -129,7 +132,8 @@ struct hash<OpenGL::ProgramVariant> {
(static_cast<std::size_t>(variant.primitive_mode) << 6) ^
static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^
- (static_cast<std::size_t>(variant.block_z) << 48);
+ (static_cast<std::size_t>(variant.block_z) << 48) ^
+ (static_cast<std::size_t>(variant.shared_memory_size) << 16);
}
};