diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-11-28 06:15:34 +0100 |
---|---|---|
committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2020-02-14 21:38:27 +0100 |
commit | 73d2d3342dc8867d32f08f89b2ca36ff071598dc (patch) | |
tree | 3a032d4a36d0f07981eeb8b396472670bfd11e5a /src/video_core/renderer_opengl/gl_query_cache.cpp | |
parent | gl_query_cache: Implement host queries using a deferred cache (diff) | |
download | yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.gz yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.bz2 yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.lz yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.xz yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.tar.zst yuzu-73d2d3342dc8867d32f08f89b2ca36ff071598dc.zip |
Diffstat (limited to '')
-rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 214 |
1 files changed, 161 insertions, 53 deletions
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 8f0e8241d..74cb73209 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <cstring> #include <memory> +#include <unordered_map> #include <utility> #include <vector> @@ -22,6 +24,13 @@ using VideoCore::QueryType; namespace { +constexpr std::uintptr_t PAGE_SIZE = 4096; +constexpr int PAGE_SHIFT = 12; + +constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp +constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp +constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8; + constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; constexpr GLenum GetTarget(QueryType type) { @@ -37,23 +46,19 @@ CounterStream::~CounterStream() = default; void CounterStream::Update(bool enabled, bool any_command_queued) { if (enabled) { - if (!current) { - current = cache.GetHostCounter(last, type); - } - return; - } - - if (current) { - EndQuery(any_command_queued); + Enable(); + } else { + Disable(any_command_queued); } - last = std::exchange(current, nullptr); } void CounterStream::Reset(bool any_command_queued) { if (current) { EndQuery(any_command_queued); + + // Immediately start a new query to avoid disabling its state. + current = cache.GetHostCounter(nullptr, type); } - current = nullptr; last = nullptr; } @@ -67,6 +72,20 @@ std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) return last; } +void CounterStream::Enable() { + if (current) { + return; + } + current = cache.GetHostCounter(last, type); +} + +void CounterStream::Disable(bool any_command_queued) { + if (current) { + EndQuery(any_command_queued); + } + last = std::exchange(current, nullptr); +} + void CounterStream::EndQuery(bool any_command_queued) { if (!any_command_queued) { // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not @@ -78,26 +97,57 @@ void CounterStream::EndQuery(bool any_command_queued) { } QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer}, system{system}, - rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {} + : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this, + QueryType::SamplesPassed}}} {} QueryCache::~QueryCache() = default; -void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) { +void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) { + const u64 addr_begin = static_cast<u64>(addr); + const u64 addr_end = addr_begin + static_cast<u64>(size); + const auto in_range = [addr_begin, addr_end](CachedQuery& query) { + const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_end = cache_begin + query.GetSizeInBytes(); + return cache_begin < addr_end && addr_begin < cache_end; + }; + + const u64 page_end = addr_end >> PAGE_SHIFT; + for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query)) { + continue; + } + rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1); + Flush(query); + } + contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), + std::end(contents)); + } +} + +void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) { + // We can handle flushes in the same way as invalidations. + InvalidateRegion(addr, size); +} + +void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) { auto& memory_manager = system.GPU().MemoryManager(); const auto host_ptr = memory_manager.GetPointer(gpu_addr); - auto query = TryGet(host_ptr); + CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); if (!query) { const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); ASSERT_OR_EXECUTE(cpu_addr, return;); - query = std::make_shared<CachedQuery>(type, *cpu_addr, host_ptr); - Register(query); + query = &Register(CachedQuery(type, *cpu_addr, host_ptr)); } - query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued())); - query->MarkAsModified(true, *this); + query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp); } void QueryCache::UpdateCounters() { @@ -117,34 +167,54 @@ void QueryCache::Reserve(QueryType type, OGLQuery&& query) { std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency, QueryType type) { - const auto type_index = static_cast<std::size_t>(type); - auto& reserve = reserved_queries[type_index]; - + auto& reserve = reserved_queries[static_cast<std::size_t>(type)]; + OGLQuery query; if (reserve.empty()) { - return std::make_shared<HostCounter>(*this, std::move(dependency), type); + query.Create(GetTarget(type)); + } else { + query = std::move(reserve.back()); + reserve.pop_back(); } - auto counter = std::make_shared<HostCounter>(*this, std::move(dependency), type, - std::move(reserve.back())); - reserve.pop_back(); - return counter; + return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query)); +} + +CachedQuery& QueryCache::Register(CachedQuery&& cached_query) { + const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT; + auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query)); + rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1); + return stored_ref; +} + +CachedQuery* QueryCache::TryGet(CacheAddr addr) { + const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; + const auto it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + return nullptr; + } + auto& contents = it->second; + const auto found = + std::find_if(std::begin(contents), std::end(contents), + [addr](const auto& query) { return query.GetCacheAddr() == addr; }); + return found != std::end(contents) ? &*found : nullptr; } -void QueryCache::FlushObjectInner(const std::shared_ptr<CachedQuery>& counter_) { - auto& counter = *counter_; - auto& stream = GetStream(counter.GetType()); +void QueryCache::Flush(CachedQuery& cached_query) { + auto& stream = GetStream(cached_query.GetType()); // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. // To avoid this disable and re-enable keeping the dependency stream. - const bool is_enabled = stream.IsEnabled(); - if (is_enabled) { - stream.Update(false, false); + // But we only have to do this if we have pending waits to be done. + const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending(); + const bool any_command_queued = rasterizer.AnyCommandQueued(); + if (slice_counter) { + stream.Update(false, any_command_queued); } - counter.Flush(); + cached_query.Flush(); - if (is_enabled) { - stream.Update(true, false); + if (slice_counter) { + stream.Update(true, any_command_queued); } } @@ -152,13 +222,6 @@ CounterStream& QueryCache::GetStream(QueryType type) { return streams[static_cast<std::size_t>(type)]; } -HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type) - : cache{cache}, type{type}, dependency{std::move(dependency)} { - const GLenum target = GetTarget(type); - query.Create(target); - glBeginQuery(target, query.handle); -} - HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type, OGLQuery&& query_) : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { @@ -170,35 +233,80 @@ HostCounter::~HostCounter() { } u64 HostCounter::Query() { - if (query.handle == 0) { - return result; + if (result) { + return *result; } - glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result); - + u64 value; + glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); if (dependency) { - result += dependency->Query(); + value += dependency->Query(); } - return result; + return *(result = value); +} + +bool HostCounter::WaitPending() const noexcept { + return result.has_value(); } CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + +CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept + : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, + counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {} CachedQuery::~CachedQuery() = default; +CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { + type = rhs.type; + cpu_addr = rhs.cpu_addr; + host_ptr = rhs.host_ptr; + counter = std::move(rhs.counter); + timestamp = rhs.timestamp; + return *this; +} + void CachedQuery::Flush() { - const u64 value = counter->Query(); - std::memcpy(host_ptr, &value, sizeof(value)); + // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero + // in these cases. + const u64 value = counter ? counter->Query() : 0; + std::memcpy(host_ptr, &value, sizeof(u64)); + + if (timestamp) { + std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + } } -void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_) { +void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { + if (counter) { + // If there's an old counter set it means the query is being rewritten by the game. + // To avoid losing the data forever, flush here. + Flush(); + } counter = std::move(counter_); + timestamp = timestamp_; +} + +bool CachedQuery::WaitPending() const noexcept { + return counter && counter->WaitPending(); } -QueryType CachedQuery::GetType() const { +QueryType CachedQuery::GetType() const noexcept { return type; } +VAddr CachedQuery::GetCpuAddr() const noexcept { + return cpu_addr; +} + +CacheAddr CachedQuery::GetCacheAddr() const noexcept { + return ToCacheAddr(host_ptr); +} + +u64 CachedQuery::GetSizeInBytes() const noexcept { + return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; +} + } // namespace OpenGL |