summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h14
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h8
-rw-r--r--src/video_core/host1x/vic.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h10
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp26
12 files changed, 65 insertions, 70 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index c47b7d866..92d77eef2 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -430,7 +430,7 @@ private:
if (query_begin >= SizeBytes() || size < 0) {
return;
}
- [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
+ u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
- const u64 word = current_word;
+ const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
u64 page = page_begin;
page_begin = 0;
@@ -531,7 +531,7 @@ private:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
- [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
+ const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,7 +539,8 @@ private:
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
- const u64 word = state_words[word_index];
+ const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+ const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}
@@ -563,7 +564,7 @@ private:
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
- [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
+ const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -573,7 +574,8 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
- const u64 word = state_words[word_index];
+ const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+ const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index c6d54be63..7024a19cf 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -99,7 +99,7 @@ struct GPU::Impl {
/// Signal the ending of command list.
void OnCommandListEnd() {
- gpu_thread.OnCommandListEnd();
+ rasterizer->ReleaseFences();
}
/// Request a host GPU memory flush from the CPU.
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 164a5252a..9c103c0d4 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -40,8 +40,6 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
scheduler.Push(submit_list->channel, std::move(submit_list->entries));
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
- } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
- rasterizer->ReleaseFences();
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
@@ -110,10 +108,6 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
rasterizer->OnCPUWrite(addr, size);
}
-void ThreadManager::OnCommandListEnd() {
- PushCommand(OnCommandListEndCommand());
-}
-
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
if (!is_async) {
// In synchronous GPU mode, block the caller until the command has executed
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index c71a419c7..90bcb5958 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -77,16 +77,12 @@ struct FlushAndInvalidateRegionCommand final {
u64 size;
};
-/// Command called within the gpu, to schedule actions after a command list end
-struct OnCommandListEndCommand final {};
-
/// Command to make the gpu look into pending requests
struct GPUTickCommand final {};
using CommandData =
std::variant<std::monostate, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
- InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
- GPUTickCommand>;
+ InvalidateRegionCommand, FlushAndInvalidateRegionCommand, GPUTickCommand>;
struct CommandDataContainer {
CommandDataContainer() = default;
@@ -134,8 +130,6 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size);
- void OnCommandListEnd();
-
void TickGPU();
private:
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index 36a04e4e0..10d7ef884 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -189,9 +189,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
for (std::size_t y = 0; y < frame_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;
- for (std::size_t x = 0; x < frame_width; ++x) {
- luma_buffer[dst + x] = luma_src[src + x];
- }
+ std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width);
}
host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
luma_buffer.size());
@@ -205,15 +203,15 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
// Frame from FFmpeg software
// Populate chroma buffer from both channels with interleaving.
const std::size_t half_width = frame_width / 2;
+ u8* chroma_buffer_data = chroma_buffer.data();
const u8* chroma_b_src = frame->data[1];
const u8* chroma_r_src = frame->data[2];
for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * half_stride;
const std::size_t dst = y * aligned_width;
-
for (std::size_t x = 0; x < half_width; ++x) {
- chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
- chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
+ chroma_buffer_data[dst + x * 2] = chroma_b_src[src + x];
+ chroma_buffer_data[dst + x * 2 + 1] = chroma_r_src[src + x];
}
}
break;
@@ -225,9 +223,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;
- for (std::size_t x = 0; x < frame_width; ++x) {
- chroma_buffer[dst + x] = chroma_src[src + x];
- }
+ std::memcpy(chroma_buffer.data() + dst, chroma_src + src, frame_width);
}
break;
}
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 26d066004..1a0cea9b7 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -30,7 +30,7 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
const Shader::Info& info_, std::string code,
- std::vector<u32> code_v)
+ std::vector<u32> code_v, bool force_context_flush)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
@@ -63,6 +63,15 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
writes_global_memory = !use_storage_buffers &&
std::ranges::any_of(info.storage_buffers_descriptors,
[](const auto& desc) { return desc.is_written; });
+ if (force_context_flush) {
+ std::scoped_lock lock{built_mutex};
+ built_fence.Create();
+ // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+ glFlush();
+ built_condvar.notify_one();
+ } else {
+ is_built = true;
+ }
}
void ComputePipeline::Configure() {
@@ -142,6 +151,9 @@ void ComputePipeline::Configure() {
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
+ if (!is_built) {
+ WaitForBuild();
+ }
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else {
@@ -223,4 +235,13 @@ void ComputePipeline::Configure() {
}
}
+void ComputePipeline::WaitForBuild() {
+ if (built_fence.handle == 0) {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+ }
+ ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+ is_built = true;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index 6534dec32..9bcc72b59 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -50,7 +50,8 @@ class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
- const Shader::Info& info_, std::string code, std::vector<u32> code_v);
+ const Shader::Info& info_, std::string code, std::vector<u32> code_v,
+ bool force_context_flush = false);
void Configure();
@@ -65,6 +66,8 @@ public:
}
private:
+ void WaitForBuild();
+
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager* gpu_memory;
@@ -81,6 +84,11 @@ private:
bool use_storage_buffers{};
bool writes_global_memory{};
+
+ std::mutex built_mutex;
+ std::condition_variable built_condvar;
+ OGLSync built_fence{};
+ bool is_built{false};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index c115dabe1..29491e762 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -176,7 +176,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
- const GraphicsPipelineKey& key_)
+ const GraphicsPipelineKey& key_, bool force_context_flush)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
state_tracker{state_tracker_}, key{key_} {
if (shader_notify) {
@@ -231,7 +231,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
const bool in_parallel = thread_worker != nullptr;
const auto backend = device.GetShaderBackend();
auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
- shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
+ shader_notify, backend, in_parallel,
+ force_context_flush](ShaderContext::Context*) mutable {
for (size_t stage = 0; stage < 5; ++stage) {
switch (backend) {
case Settings::ShaderBackend::GLSL:
@@ -251,7 +252,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
break;
}
}
- if (in_parallel) {
+ if (force_context_flush || in_parallel) {
std::scoped_lock lock{built_mutex};
built_fence.Create();
// Flush this context to ensure compilation commands and fence are in the GPU pipe.
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 1c06b3655..7bab3be0a 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -78,7 +78,7 @@ public:
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
- const GraphicsPipelineKey& key_);
+ const GraphicsPipelineKey& key_, bool force_context_flush = false);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7dd854e0f..626ea7dcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -286,7 +286,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
file.read(reinterpret_cast<char*>(&key), sizeof(key));
queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
ctx->pools.ReleaseContents();
- auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+ auto pipeline{CreateComputePipeline(ctx->pools, key, env, true)};
std::scoped_lock lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
@@ -307,7 +307,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
env_ptrs.push_back(&env);
}
ctx->pools.ReleaseContents();
- auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+ auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false, true)};
std::scoped_lock lock{state.mutex};
if (pipeline) {
graphics_cache.emplace(key, std::move(pipeline));
@@ -439,7 +439,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
- std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ std::span<Shader::Environment* const> envs, bool use_shader_workers,
+ bool force_context_flush) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
size_t env_index{};
u32 total_storage_buffers{};
@@ -531,10 +532,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
}
previous_program = &program;
}
- auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
+ auto* const thread_worker{use_shader_workers ? workers.get() : nullptr};
return std::make_unique<GraphicsPipeline>(device, texture_cache, buffer_cache, program_manager,
state_tracker, thread_worker, &shader_notify, sources,
- sources_spirv, infos, key);
+ sources_spirv, infos, key, force_context_flush);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -559,8 +560,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
- ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
- Shader::Environment& env) try {
+ ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env,
+ bool force_context_flush) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@@ -589,7 +590,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, program_manager,
- program.info, code, code_spirv);
+ program.info, code, code_spirv, force_context_flush);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
return nullptr;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index f82420592..6b9732fca 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -50,14 +50,16 @@ private:
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
- std::span<Shader::Environment* const> envs, bool build_in_parallel);
+ std::span<Shader::Environment* const> envs, bool use_shader_workers,
+ bool force_context_flush = false);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
const VideoCommon::ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
const ComputePipelineKey& key,
- Shader::Environment& env);
+ Shader::Environment& env,
+ bool force_context_flush = false);
std::unique_ptr<ShaderWorker> CreateWorkers() const;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index f91bb5a1d..baedc4424 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -548,31 +548,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
- if (key.state.dynamic_vertex_input) {
- const size_t num_vertex_arrays = std::min(
- key.state.attributes.size(), static_cast<size_t>(device.GetMaxVertexInputBindings()));
- for (size_t index = 0; index < num_vertex_arrays; ++index) {
- const u32 type = key.state.DynamicAttributeType(index);
- if (!stage_infos[0].loads.Generic(index) || type == 0) {
- continue;
- }
- vertex_attributes.push_back({
- .location = static_cast<u32>(index),
- .binding = 0,
- .format = type == 1 ? VK_FORMAT_R32_SFLOAT
- : type == 2 ? VK_FORMAT_R32_SINT
- : VK_FORMAT_R32_UINT,
- .offset = 0,
- });
- }
- if (!vertex_attributes.empty()) {
- vertex_bindings.push_back({
- .binding = 0,
- .stride = 4,
- .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
- });
- }
- } else {
+ if (!key.state.dynamic_vertex_input) {
const size_t num_vertex_arrays = std::min(
Maxwell::NumVertexArrays, static_cast<size_t>(device.GetMaxVertexInputBindings()));
for (size_t index = 0; index < num_vertex_arrays; ++index) {