summaryrefslogtreecommitdiffstats
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h14
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h89
2 files changed, 89 insertions, 14 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 92d77eef2..c47b7d866 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -430,7 +430,7 @@ private:
if (query_begin >= SizeBytes() || size < 0) {
return;
}
- u64* const untracked_words = Array<Type::Untracked>();
+ [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
- const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
+ const u64 word = current_word;
u64 page = page_begin;
page_begin = 0;
@@ -531,7 +531,7 @@ private:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
- const u64* const untracked_words = Array<Type::Untracked>();
+ [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,8 +539,7 @@ private:
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
- const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
- const u64 word = state_words[word_index] & ~off_word;
+ const u64 word = state_words[word_index];
if (word == 0) {
continue;
}
@@ -564,7 +563,7 @@ private:
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
- const u64* const untracked_words = Array<Type::Untracked>();
+ [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -574,8 +573,7 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
- const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
- const u64 word = state_words[word_index] & ~off_word;
+ const u64 word = state_words[word_index];
if (word == 0) {
continue;
}
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f1c60d1f3..627917ab6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -200,7 +200,16 @@ public:
/// Return true when a CPU region is modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
- std::mutex mutex;
+ void SetDrawIndirect(
+ const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
+ current_draw_indirect = current_draw_indirect_;
+ }
+
+ [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
+
+ [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
+
+ std::recursive_mutex mutex;
Runtime& runtime;
private:
@@ -272,6 +281,8 @@ private:
void BindHostVertexBuffers();
+ void BindHostDrawIndirectBuffers();
+
void BindHostGraphicsUniformBuffers(size_t stage);
void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
@@ -298,6 +309,8 @@ private:
void UpdateVertexBuffer(u32 index);
+ void UpdateDrawIndirect();
+
void UpdateUniformBuffers(size_t stage);
void UpdateStorageBuffers(size_t stage);
@@ -372,6 +385,8 @@ private:
SlotVector<Buffer> slot_buffers;
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
+ const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
+
u32 last_index_count = 0;
Binding index_buffer;
@@ -380,6 +395,8 @@ private:
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
+ Binding count_buffer_binding;
+ Binding indirect_buffer_binding;
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
@@ -674,6 +691,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
}
BindHostVertexBuffers();
BindHostTransformFeedbackBuffers();
+ if (current_draw_indirect) {
+ BindHostDrawIndirectBuffers();
+ }
}
template <class P>
@@ -823,6 +843,7 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
template <class P>
void BufferCache<P>::CommitAsyncFlushesHigh() {
AccumulateFlushes();
+
if (committed_ranges.empty()) {
return;
}
@@ -869,7 +890,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
buffer_id,
});
// Align up to avoid cache conflicts
- constexpr u64 align = 256ULL;
+ constexpr u64 align = 8ULL;
constexpr u64 mask = ~(align - 1ULL);
total_size_bytes += (new_size + align - 1) & mask;
largest_copy = std::max(largest_copy, new_size);
@@ -1042,6 +1063,19 @@ void BufferCache<P>::BindHostVertexBuffers() {
}
template <class P>
+void BufferCache<P>::BindHostDrawIndirectBuffers() {
+ const auto bind_buffer = [this](const Binding& binding) {
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer, binding.buffer_id);
+ SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
+ };
+ if (current_draw_indirect->include_count) {
+ bind_buffer(count_buffer_binding);
+ }
+ bind_buffer(indirect_buffer_binding);
+}
+
+template <class P>
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
u32 dirty = ~0U;
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
@@ -1272,6 +1306,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
UpdateStorageBuffers(stage);
UpdateTextureBuffers(stage);
}
+ if (current_draw_indirect) {
+ UpdateDrawIndirect();
+ }
} while (has_deleted_buffers);
}
@@ -1289,7 +1326,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const auto& index_array = draw_state.index_buffer;
auto& flags = maxwell3d->dirty.flags;
- if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
+ if (!flags[Dirty::IndexBuffer]) {
return;
}
flags[Dirty::IndexBuffer] = false;
@@ -1362,6 +1399,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
}
template <class P>
+void BufferCache<P>::UpdateDrawIndirect() {
+ const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ binding = NULL_BINDING;
+ return;
+ }
+ binding = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = static_cast<u32>(size),
+ .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
+ };
+ };
+ if (current_draw_indirect->include_count) {
+ update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
+ }
+ update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
+ indirect_buffer_binding);
+}
+
+template <class P>
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
Binding& binding = uniform_buffers[stage][index];
@@ -1880,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
bool is_written) const {
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
- const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
+ const u32 alignment = runtime.GetStorageBufferAlignment();
+
+ const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
+ const u32 aligned_size =
+ Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
+
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
- const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
+
+ const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
const Binding binding{
.cpu_addr = *cpu_addr,
- .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
+ .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
.buffer_id = BufferId{},
};
return binding;
@@ -1941,4 +2006,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
}
}
+template <class P>
+std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
+ auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
+ return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
+}
+
+template <class P>
+std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
+ auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
+ return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
+}
+
} // namespace VideoCommon