summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/audio_core/CMakeLists.txt3
-rw-r--r--src/audio_core/audio_renderer.cpp30
-rw-r--r--src/audio_core/command_generator.cpp86
-rw-r--r--src/audio_core/command_generator.h23
-rw-r--r--src/audio_core/sink_context.cpp15
-rw-r--r--src/audio_core/sink_context.h2
-rw-r--r--src/common/fs/file.cpp29
-rw-r--r--src/common/fs/file.h11
-rw-r--r--src/common/logging/backend.cpp19
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/hle/service/audio/audren_u.cpp2
-rw-r--r--src/input_common/CMakeLists.txt3
-rw-r--r--src/video_core/memory_manager.cpp109
-rw-r--r--src/video_core/memory_manager.h22
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/texture_cache/image_base.cpp3
-rw-r--r--src/video_core/texture_cache/image_base.h39
-rw-r--r--src/video_core/texture_cache/texture_cache.h419
-rw-r--r--src/video_core/texture_cache/types.h1
-rw-r--r--src/video_core/texture_cache/util.cpp24
-rw-r--r--src/video_core/texture_cache/util.h4
27 files changed, 714 insertions, 156 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 65a4922ea..f8ec8fea8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -82,6 +82,7 @@ else()
-Werror=missing-declarations
-Werror=missing-field-initializers
-Werror=reorder
+ -Werror=sign-compare
-Werror=switch
-Werror=uninitialized
-Werror=unused-function
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d25a1a645..090dd19b1 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -51,9 +51,6 @@ if (NOT MSVC)
target_compile_options(audio_core PRIVATE
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=reorder
- -Werror=sign-compare
-Werror=shadow
-Werror=unused-parameter
-Werror=unused-variable
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index ccd5ca6cc..7dba739b4 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -29,10 +29,9 @@ namespace {
(static_cast<float>(r_channel) * r_mix_amount)));
}
-[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel,
- s16 fc_channel,
- [[maybe_unused]] s16 lf_channel,
- s16 bl_channel, s16 br_channel) {
+[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(
+ s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel,
+ s16 br_channel) {
// Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels
// are mixed to be 36.94%
@@ -57,11 +56,11 @@ namespace {
const std::array<float_le, 4>& coeff) {
const auto left =
static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
- static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0];
+ static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3];
const auto right =
static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
- static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0];
+ static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3];
return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))};
}
@@ -241,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
const auto channel_count = buffer_offsets.size();
const auto& final_mix = mix_context.GetFinalMixInfo();
const auto& in_params = final_mix.GetInParams();
- std::vector<s32*> mix_buffers(channel_count);
+ std::vector<std::span<s32>> mix_buffers(channel_count);
for (std::size_t i = 0; i < channel_count; i++) {
mix_buffers[i] =
command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]);
@@ -294,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample);
} else if (stream_channel_count == 2) {
// Mix all channels into 2 channels
- if (sink_context.HasDownMixingCoefficients()) {
- const auto [left, right] = Mix6To2WithCoefficients(
- fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
- sink_context.GetDownmixCoefficients());
- buffer[i * stream_channel_count + 0] = left;
- buffer[i * stream_channel_count + 1] = right;
- } else {
- const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample,
- lf_sample, bl_sample, br_sample);
- buffer[i * stream_channel_count + 0] = left;
- buffer[i * stream_channel_count + 1] = right;
- }
+ const auto [left, right] = Mix6To2WithCoefficients(
+ fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
+ sink_context.GetDownmixCoefficients());
+ buffer[i * stream_channel_count + 0] = left;
+ buffer[i * stream_channel_count + 1] = right;
} else if (stream_channel_count == 6) {
// Pass through
buffer[i * stream_channel_count + 0] = fl_sample;
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index b3250be09..b99d0fc91 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
template <std::size_t N>
-void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) {
for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) {
for (std::size_t j = 0; j < N; j++) {
output[i + j] +=
@@ -40,7 +40,8 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
}
}
-s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) {
+s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta,
+ s32 sample_count) {
s32 x = 0;
for (s32 i = 0; i < sample_count; i++) {
x = static_cast<s32>(static_cast<float>(input[i]) * gain);
@@ -50,20 +51,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam
return x;
}
-void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) {
+void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta,
+ s32 sample_count) {
for (s32 i = 0; i < sample_count; i++) {
output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
gain += delta;
}
}
-void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain,
+ s32 sample_count) {
for (s32 i = 0; i < sample_count; i++) {
output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
}
}
-s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
+s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) {
const bool positive = first_sample > 0;
auto final_sample = std::abs(first_sample);
for (s32 i = 0; i < sample_count; i++) {
@@ -128,10 +131,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
template <std::size_t CHANNEL_COUNT>
-void ApplyReverbGeneric(I3dl2ReverbState& state,
- const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
- const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
- s32 sample_count) {
+void ApplyReverbGeneric(
+ I3dl2ReverbState& state,
+ const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input,
+ const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) {
auto GetTapLookup = []() {
if constexpr (CHANNEL_COUNT == 1) {
@@ -457,8 +460,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff
"input_mix_buffer={}, output_mix_buffer={}",
node_id, input_offset, output_offset);
}
- const auto* input = GetMixBuffer(input_offset);
- auto* output = GetMixBuffer(output_offset);
+ std::span<const s32> input = GetMixBuffer(input_offset);
+ std::span<s32> output = GetMixBuffer(output_offset);
// Biquad filter parameters
const auto [n0, n1, n2] = params.numerator;
@@ -551,8 +554,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
return;
}
- std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
- std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
+ std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{};
+ std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{};
const auto status = params.status;
for (s32 i = 0; i < channel_count; i++) {
@@ -587,7 +590,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
for (s32 i = 0; i < channel_count; i++) {
// Only copy if the buffer input and output do not match!
if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
- std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
+ std::memcpy(output[i].data(), input[i].data(),
+ worker_params.sample_count * sizeof(s32));
}
}
}
@@ -603,8 +607,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset,
for (s32 i = 0; i < channel_count; i++) {
// TODO(ogniK): Actually implement biquad filter
if (params.input[i] != params.output[i]) {
- const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]);
- auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
+ std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]);
+ std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]);
ApplyMix<1>(output, input, 32768, worker_params.sample_count);
}
}
@@ -643,14 +647,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf
if (samples_read != static_cast<int>(worker_params.sample_count) &&
samples_read <= params.sample_count) {
- std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read);
+ std::memset(GetMixBuffer(output_index).data(), 0,
+ params.sample_count - samples_read);
}
} else {
AuxInfoDSP empty{};
memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP));
memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP));
if (output_index != input_index) {
- std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index),
+ std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(),
worker_params.sample_count * sizeof(s32));
}
}
@@ -668,7 +673,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter
}
s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
- const s32* data, u32 sample_count, u32 write_offset,
+ std::span<const s32> data, u32 sample_count, u32 write_offset,
u32 write_count) {
if (max_samples == 0) {
return 0;
@@ -678,14 +683,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
return 0;
}
- std::size_t data_offset{};
+ s32 data_offset{};
u32 remaining = sample_count;
while (remaining > 0) {
// Get position in buffer
const auto base = send_buffer + (offset * sizeof(u32));
const auto samples_to_grab = std::min(max_samples - offset, remaining);
// Write to output
- memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32));
+ memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32));
offset = (offset + samples_to_grab) % max_samples;
remaining -= samples_to_grab;
data_offset += samples_to_grab;
@@ -698,7 +703,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
}
s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
- s32* out_data, u32 sample_count, u32 read_offset,
+ std::span<s32> out_data, u32 sample_count, u32 read_offset,
u32 read_count) {
if (max_samples == 0) {
return 0;
@@ -710,15 +715,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
}
u32 remaining = sample_count;
+ s32 data_offset{};
while (remaining > 0) {
const auto base = recv_buffer + (offset * sizeof(u32));
const auto samples_to_grab = std::min(max_samples - offset, remaining);
std::vector<s32> buffer(samples_to_grab);
memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32));
- std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32));
- out_data += samples_to_grab;
+ std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32));
offset = (offset + samples_to_grab) % max_samples;
remaining -= samples_to_grab;
+ data_offset += samples_to_grab;
}
if (read_count != 0) {
@@ -965,8 +971,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t
node_id, input_offset, output_offset, volume);
}
- auto* output = GetMixBuffer(output_offset);
- const auto* input = GetMixBuffer(input_offset);
+ std::span<s32> output = GetMixBuffer(output_offset);
+ std::span<const s32> input = GetMixBuffer(input_offset);
const s32 gain = static_cast<s32>(volume * 32768.0f);
// Mix with loop unrolling
@@ -1172,12 +1178,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
return samples_processed;
}
-s32* CommandGenerator::GetMixBuffer(std::size_t index) {
- return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) {
+ return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count),
+ worker_params.sample_count);
}
-const s32* CommandGenerator::GetMixBuffer(std::size_t index) const {
- return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const {
+ return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count),
+ worker_params.sample_count);
}
std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const {
@@ -1188,15 +1196,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const {
return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT;
}
-s32* CommandGenerator::GetChannelMixBuffer(s32 channel) {
+std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) {
return GetMixBuffer(worker_params.mix_buffer_count + channel);
}
-const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const {
+std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const {
return GetMixBuffer(worker_params.mix_buffer_count + channel);
}
-void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output,
+void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
VoiceState& dsp_state, s32 channel,
s32 target_sample_rate, s32 sample_count,
s32 node_id) {
@@ -1208,7 +1216,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate,
in_params.mix_id, in_params.splitter_info_id);
}
- ASSERT_OR_EXECUTE(output != nullptr, { return; });
+ ASSERT_OR_EXECUTE(output.data() != nullptr, { return; });
const auto resample_rate = static_cast<s32>(
static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) *
@@ -1225,6 +1233,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
}
std::size_t temp_mix_offset{};
+ s32 samples_output{};
auto samples_remaining = sample_count;
while (samples_remaining > 0) {
const auto samples_to_output = std::min(samples_remaining, min_required_samples);
@@ -1328,20 +1337,21 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) {
// No need to resample
- std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32));
+ std::memcpy(output.data() + samples_output, sample_buffer.data(),
+ samples_read * sizeof(s32));
} else {
std::fill(sample_buffer.begin() + temp_mix_offset,
sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read),
0);
- AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction,
- samples_to_output);
+ AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate,
+ dsp_state.fraction, samples_to_output);
// Resample
for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) {
dsp_state.sample_history[i] = sample_buffer[samples_to_read + i];
}
}
- output += samples_to_output;
samples_remaining -= samples_to_output;
+ samples_output += samples_to_output;
}
}
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index f310d7317..59a33ba76 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <span>
#include "audio_core/common.h"
#include "audio_core/voice_context.h"
#include "common/common_types.h"
@@ -41,10 +42,10 @@ public:
void PreCommand();
void PostCommand();
- [[nodiscard]] s32* GetChannelMixBuffer(s32 channel);
- [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const;
- [[nodiscard]] s32* GetMixBuffer(std::size_t index);
- [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const;
+ [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel);
+ [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const;
+ [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index);
+ [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const;
[[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const;
[[nodiscard]] std::size_t GetTotalMixBufferCount() const;
@@ -77,10 +78,11 @@ private:
void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled);
[[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index);
- s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data,
- u32 sample_count, u32 write_offset, u32 write_count);
- s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
- u32 sample_count, u32 read_offset, u32 read_count);
+ s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
+ std::span<const s32> data, u32 sample_count, u32 write_offset,
+ u32 write_count);
+ s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
+ std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count);
void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
std::vector<u8>& work_buffer);
@@ -91,8 +93,9 @@ private:
s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
- void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state,
- s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id);
+ void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
+ VoiceState& dsp_state, s32 channel, s32 target_sample_rate,
+ s32 sample_count, s32 node_id);
AudioCommon::AudioRendererParameter& worker_params;
VoiceContext& voice_context;
diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp
index a69543696..cc55b290c 100644
--- a/src/audio_core/sink_context.cpp
+++ b/src/audio_core/sink_context.cpp
@@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const {
void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) {
ASSERT(in.type == SinkTypes::Device);
- has_downmix_coefs = in.device.down_matrix_enabled;
- if (has_downmix_coefs) {
+ if (in.device.down_matrix_enabled) {
downmix_coefficients = in.device.down_matrix_coef;
+ } else {
+ downmix_coefficients = {
+ 1.0f, // front
+ 0.707f, // center
+ 0.0f, // lfe
+ 0.707f, // back
+ };
}
+
in_use = in.in_use;
use_count = in.device.input_count;
buffers = in.device.input;
@@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const {
return buffer_ret;
}
-bool SinkContext::HasDownMixingCoefficients() const {
- return has_downmix_coefs;
-}
-
const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const {
return downmix_coefficients;
}
diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h
index 9e2b69785..254961fe2 100644
--- a/src/audio_core/sink_context.h
+++ b/src/audio_core/sink_context.h
@@ -84,7 +84,6 @@ public:
[[nodiscard]] bool InUse() const;
[[nodiscard]] std::vector<u8> OutputBuffers() const;
- [[nodiscard]] bool HasDownMixingCoefficients() const;
[[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const;
private:
@@ -92,7 +91,6 @@ private:
s32 use_count{};
std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{};
std::size_t sink_count{};
- bool has_downmix_coefs{false};
DownmixCoefficients downmix_coefficients{};
};
} // namespace AudioCore
diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp
index 077f34995..274f57659 100644
--- a/src/common/fs/file.cpp
+++ b/src/common/fs/file.cpp
@@ -306,9 +306,9 @@ bool IOFile::Flush() const {
errno = 0;
#ifdef _WIN32
- const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+ const auto flush_result = std::fflush(file) == 0;
#else
- const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+ const auto flush_result = std::fflush(file) == 0;
#endif
if (!flush_result) {
@@ -320,6 +320,28 @@ bool IOFile::Flush() const {
return flush_result;
}
+bool IOFile::Commit() const {
+ if (!IsOpen()) {
+ return false;
+ }
+
+ errno = 0;
+
+#ifdef _WIN32
+ const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+#else
+ const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+#endif
+
+ if (!commit_result) {
+ const auto ec = std::error_code{errno, std::generic_category()};
+ LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}",
+ PathToUTF8String(file_path), ec.message());
+ }
+
+ return commit_result;
+}
+
bool IOFile::SetSize(u64 size) const {
if (!IsOpen()) {
return false;
@@ -347,6 +369,9 @@ u64 IOFile::GetSize() const {
return 0;
}
+ // Flush any unwritten buffered data into the file prior to retrieving the file size.
+ std::fflush(file);
+
std::error_code ec;
const auto file_size = fs::file_size(file_path, ec);
diff --git a/src/common/fs/file.h b/src/common/fs/file.h
index 588fe619d..2c4ab4332 100644
--- a/src/common/fs/file.h
+++ b/src/common/fs/file.h
@@ -396,13 +396,22 @@ public:
[[nodiscard]] size_t WriteString(std::span<const char> string) const;
/**
- * Attempts to flush any unwritten buffered data into the file and flush the file into the disk.
+ * Attempts to flush any unwritten buffered data into the file.
*
* @returns True if the flush was successful, false otherwise.
*/
bool Flush() const;
/**
+ * Attempts to commit the file into the disk.
+ * Note that this is an expensive operation as this forces the operating system to write
+ * the contents of the file associated with the file descriptor into the disk.
+ *
+ * @returns True if the commit was successful, false otherwise.
+ */
+ bool Commit() const;
+
+ /**
* Resizes the file to a given size.
* If the file is resized to a smaller size, the remainder of the file is discarded.
* If the file is resized to a larger size, the new area appears as if zero-filled.
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index b6fa4affb..61dddab3f 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -171,19 +171,22 @@ FileBackend::FileBackend(const std::filesystem::path& filename) {
FileBackend::~FileBackend() = default;
void FileBackend::Write(const Entry& entry) {
+ if (!file->IsOpen()) {
+ return;
+ }
+
using namespace Common::Literals;
- // prevent logs from going over the maximum size (in case its spamming and the user doesn't
- // know)
+ // Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB;
constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB;
- if (!file->IsOpen()) {
- return;
- }
+ const bool write_limit_exceeded =
+ bytes_written > MAX_BYTES_WRITTEN_EXTENDED ||
+ (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging);
- if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) {
- return;
- } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) {
+ // Close the file after the write limit is exceeded.
+ if (write_limit_exceeded) {
+ file->Close();
return;
}
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 19b970981..b2b0dbe05 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -667,8 +667,6 @@ else()
target_compile_options(core PRIVATE
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=sign-compare
-Werror=shadow
$<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 7583d68b2..b769fe959 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -290,7 +290,7 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- rb.Push<u32>(1);
+ rb.Push<u32>(2);
}
// Should be similar to QueryAudioDeviceOutputEvent
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index c3423c815..c4283a952 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -44,10 +44,7 @@ else()
-Werror
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=reorder
-Werror=shadow
- -Werror=sign-compare
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
-Werror=unused-variable
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7124c755c..d2b9d5f2b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
} else {
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
}
- // Flush and invalidate through the GPU interface, to be asynchronous if possible.
- const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
- rasterizer->UnmapMemory(*cpu_addr, size);
+ const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
+
+ for (const auto& map : submapped_ranges) {
+ // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+ const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
+ ASSERT(cpu_addr);
+
+ rasterizer->UnmapMemory(*cpu_addr, map.second);
+ }
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
@@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
//// Lock the new page
// TryLockPage(page_entry, size);
+ auto& current_page = page_table[PageEntryIndex(gpu_addr)];
- page_table[PageEntryIndex(gpu_addr)] = page_entry;
+ if ((!current_page.IsValid() && page_entry.IsValid()) ||
+ current_page.ToAddress() != page_entry.ToAddress()) {
+ rasterizer->ModifyGPUMemory(gpu_addr, size);
+ }
+
+ current_page = page_entry;
}
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
@@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
return page_entry.ToAddress() + (gpu_addr & page_mask);
}
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
+ size_t page_index{addr >> page_bits};
+ const size_t page_last{(addr + size + page_size - 1) >> page_bits};
+ while (page_index < page_last) {
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (page_addr && *page_addr != 0) {
+ return page_addr;
+ }
+ ++page_index;
+ }
+ return std::nullopt;
+}
+
template <typename T>
T MemoryManager::Read(GPUVAddr addr) const {
if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
return page <= Core::Memory::PAGE_SIZE;
}
+bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
+ size_t page_index{gpu_addr >> page_bits};
+ const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+ std::optional<VAddr> old_page_addr{};
+ while (page_index != page_last) {
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (!page_addr || *page_addr == 0) {
+ return false;
+ }
+ if (old_page_addr) {
+ if (*old_page_addr + page_size != *page_addr) {
+ return false;
+ }
+ }
+ old_page_addr = page_addr;
+ ++page_index;
+ }
+ return true;
+}
+
+bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
+ size_t page_index{gpu_addr >> page_bits};
+ const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+ while (page_index < page_last) {
+ if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
+ return false;
+ }
+ ++page_index;
+ }
+ return true;
+}
+
+std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
+ GPUVAddr gpu_addr, std::size_t size) const {
+ std::vector<std::pair<GPUVAddr, std::size_t>> result{};
+ size_t page_index{gpu_addr >> page_bits};
+ size_t remaining_size{size};
+ size_t page_offset{gpu_addr & page_mask};
+ std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
+ std::optional<VAddr> old_page_addr{};
+ const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
+ if (!last_segment) {
+ GPUVAddr new_base_addr = page_index << page_bits;
+ last_segment = {new_base_addr, bytes};
+ } else {
+ last_segment->second += bytes;
+ }
+ };
+ const auto split = [this, &last_segment, &result] {
+ if (last_segment) {
+ result.push_back(*last_segment);
+ last_segment = std::nullopt;
+ }
+ };
+ while (remaining_size > 0) {
+ const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (!page_addr) {
+ split();
+ } else if (old_page_addr) {
+ if (*old_page_addr + page_size != *page_addr) {
+ split();
+ }
+ extend_size(num_bytes);
+ } else {
+ extend_size(num_bytes);
+ }
+ ++page_index;
+ page_offset = 0;
+ remaining_size -= num_bytes;
+ }
+ split();
+ return result;
+}
+
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b3538d503..99d13e7f6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -76,6 +76,8 @@ public:
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+ [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
+
template <typename T>
[[nodiscard]] T Read(GPUVAddr addr) const;
@@ -112,10 +114,28 @@ public:
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
/**
- * IsGranularRange checks if a gpu region can be simply read with a pointer.
+ * Checks if a gpu region can be simply read with a pointer.
*/
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
+ /**
+ * Checks if a gpu region is mapped by a single range of cpu addresses.
+ */
+ [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+ /**
+ * Checks if a gpu region is mapped entirely.
+ */
+ [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+ /**
+ * Returns a vector with all the subranges of cpu addresses mapped beneath.
+ * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
+ * will be returned;
+ */
+ std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
+ std::size_t size) const;
+
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 07939432f..0cec4225b 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -87,6 +87,9 @@ public:
/// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
+ /// Remap GPU memory range. This means underneath backing memory changed
+ virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
+
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eb8bdaa85..07ad0e205 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
shader_cache.OnCPUWrite(addr, size);
}
+void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapGPUMemory(addr, size);
+ }
+}
+
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9995a563b..482efed7a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,6 +80,7 @@ public:
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index abaf1ee6a..8fb5be393 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
- copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
- copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+ copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1c9120170..bd4d649cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
pipeline_cache.OnCPUWrite(addr, size);
}
+void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapGPUMemory(addr, size);
+ }
+}
+
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index cb8c5c279..41459c5c5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -72,6 +72,7 @@ public:
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index f22358c90..6052d148a 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
}
}
+ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
+ : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
+
std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
if (other_addr < gpu_addr) {
// Subresource address can't be lower than the base
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index e326cab71..ff1feda9b 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 {
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
+ Remapped = 1 << 8, ///< Image has been remapped.
+ Sparse = 1 << 9, ///< Image has non continous submemory.
// Garbage Collection Flags
- BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
- ///< garbage collection priority
- Alias = 1 << 9, ///< This image has aliases and has priority on garbage
- ///< collection
+ BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher
+ ///< garbage collection priority
+ Alias = 1 << 11, ///< This image has aliases and has priority on garbage
+ ///< collection
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@@ -57,6 +59,12 @@ struct ImageBase {
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
}
+ [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_gpu_addr + overlap_size;
+ const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
+ return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+ }
+
void CheckBadOverlapState();
void CheckAliasState();
@@ -84,6 +92,29 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images;
+ ImageMapId map_view_id{};
+};
+
+struct ImageMapView {
+ explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
+
+ [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_cpu_addr + overlap_size;
+ const VAddr cpu_addr_end = cpu_addr + size;
+ return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
+ }
+
+ [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+ const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
+ const GPUVAddr gpu_addr_end = gpu_addr + size;
+ return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+ }
+
+ GPUVAddr gpu_addr;
+ VAddr cpu_addr;
+ size_t size;
+ ImageId image_id;
+ bool picked{};
};
struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d8dbd3824..e3542301e 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -13,6 +13,7 @@
#include <span>
#include <type_traits>
#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -152,6 +153,9 @@ public:
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
+ /// Remove images in a region
+ void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
+
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
@@ -190,7 +194,22 @@ public:
private:
/// Iterate over all page indices in a range
template <typename Func>
- static void ForEachPage(VAddr addr, size_t size, Func&& func) {
+ static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
+ static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+ const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ if constexpr (RETURNS_BOOL) {
+ if (func(page)) {
+ break;
+ }
+ } else {
+ func(page);
+ }
+ }
+ }
+
+ template <typename Func>
+ static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
@@ -220,7 +239,7 @@ private:
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
- void RefreshContents(Image& image);
+ void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
@@ -269,6 +288,16 @@ private:
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
+ template <typename Func>
+ void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+ template <typename Func>
+ void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+ /// Iterates over all the images in a region calling func
+ template <typename Func>
+ void ForEachSparseSegment(ImageBase& image, Func&& func);
+
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
@@ -279,10 +308,10 @@ private:
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
- void TrackImage(ImageBase& image);
+ void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
- void UntrackImage(ImageBase& image);
+ void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
@@ -340,7 +369,13 @@ private:
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
- std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
+ std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
+
+ std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
+
+ VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
@@ -349,6 +384,7 @@ private:
u64 critical_memory;
SlotVector<Image> slot_images;
+ SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
@@ -459,7 +495,7 @@ void TextureCache<P>::RunGarbageCollector() {
}
}
if (True(image->flags & ImageFlagBits::Tracked)) {
- UntrackImage(*image);
+ UntrackImage(*image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id);
@@ -658,7 +694,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
return;
}
image.flags |= ImageFlagBits::CpuModified;
- UntrackImage(image);
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image, image_id);
+ }
});
}
@@ -695,7 +733,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
if (True(image.flags & ImageFlagBits::Tracked)) {
- UntrackImage(image);
+ UntrackImage(image, id);
}
UnregisterImage(id);
DeleteImage(id);
@@ -703,6 +741,23 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
}
template <class P>
+void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
+ std::vector<ImageId> deleted_images;
+ ForEachImageInRegionGPU(gpu_addr, size,
+ [&](ImageId id, Image&) { deleted_images.push_back(id); });
+ for (const ImageId id : deleted_images) {
+ Image& image = slot_images[id];
+ if (True(image.flags & ImageFlagBits::Remapped)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Remapped;
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image, id);
+ }
+ }
+}
+
+template <class P>
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy,
@@ -833,9 +888,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
if (it == page_table.end()) {
return nullptr;
}
- const auto& image_ids = it->second;
- for (const ImageId image_id : image_ids) {
- const ImageBase& image = slot_images[image_id];
+ const auto& image_map_ids = it->second;
+ for (const ImageMapId map_id : image_map_ids) {
+ const ImageMapView& map = slot_map_views[map_id];
+ const ImageBase& image = slot_images[map.image_id];
if (image.cpu_addr != cpu_addr) {
continue;
}
@@ -915,13 +971,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
}
template <class P>
-void TextureCache<P>::RefreshContents(Image& image) {
+void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
if (False(image.flags & ImageFlagBits::CpuModified)) {
// Only upload modified images
return;
}
image.flags &= ~ImageFlagBits::CpuModified;
- TrackImage(image);
+ TrackImage(image, image_id);
if (image.info.num_samples > 1) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@@ -958,7 +1014,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
- if (!IsValidAddress(gpu_memory, config)) {
+ if (!IsValidEntry(gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
const auto [pair, is_new] = image_views.try_emplace(config);
@@ -1000,14 +1056,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
template <class P>
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
- return ImageId{};
+ cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+ if (!cpu_addr) {
+ return ImageId{};
+ }
}
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
ImageId image_id;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
+ if (True(existing_image.flags & ImageFlagBits::Remapped)) {
+ return false;
+ }
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
const bool strict_size = False(options & RelaxedOptions::Size) &&
True(existing_image.flags & ImageFlagBits::Strong);
@@ -1033,7 +1095,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ const auto size = CalculateGuestSizeInBytes(info);
+ cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+ if (!cpu_addr) {
+ const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
+ virtual_invalid_space += Common::AlignUp(size, 32);
+ cpu_addr = std::optional<VAddr>(fake_addr);
+ }
+ }
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
const Image& image = slot_images[image_id];
@@ -1053,10 +1124,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
std::vector<ImageId> overlap_ids;
+ std::unordered_set<ImageId> overlaps_found;
std::vector<ImageId> left_aliased_ids;
std::vector<ImageId> right_aliased_ids;
+ std::unordered_set<ImageId> ignore_textures;
std::vector<ImageId> bad_overlap_ids;
- ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
+ const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
+ if (True(overlap.flags & ImageFlagBits::Remapped)) {
+ ignore_textures.insert(overlap_id);
+ return;
+ }
if (info.type == ImageType::Linear) {
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
// Alias linear images with the same pitch
@@ -1064,6 +1141,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
}
return;
}
+ overlaps_found.insert(overlap_id);
static constexpr bool strict_size = true;
const std::optional<OverlapResult> solution = ResolveOverlap(
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1087,12 +1165,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
bad_overlap_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::BadOverlap;
}
- });
+ };
+ ForEachImageInRegion(cpu_addr, size_bytes, region_check);
+ const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
+ if (!overlaps_found.contains(overlap_id)) {
+ if (True(overlap.flags & ImageFlagBits::Remapped)) {
+ ignore_textures.insert(overlap_id);
+ }
+ if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
+ ignore_textures.insert(overlap_id);
+ }
+ }
+ };
+ ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
+ if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+ new_image.flags |= ImageFlagBits::Sparse;
+ }
+
+ for (const ImageId overlap_id : ignore_textures) {
+ Image& overlap = slot_images[overlap_id];
+ if (True(overlap.flags & ImageFlagBits::GpuModified)) {
+ UNIMPLEMENTED();
+ }
+ if (True(overlap.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(overlap, overlap_id);
+ }
+ UnregisterImage(overlap_id);
+ DeleteImage(overlap_id);
+ }
+
// TODO: Only upload what we need
- RefreshContents(new_image);
+ RefreshContents(new_image, new_image_id);
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
@@ -1104,7 +1210,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
runtime.CopyImage(new_image, overlap, copies);
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
- UntrackImage(overlap);
+ UntrackImage(overlap, overlap_id);
}
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
@@ -1239,7 +1345,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images;
- ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
+ boost::container::small_vector<ImageMapId, 32> maps;
+ ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if constexpr (BOOL_BREAK) {
@@ -1248,12 +1355,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
return;
}
}
+ for (const ImageMapId map_id : it->second) {
+ ImageMapView& map = slot_map_views[map_id];
+ if (map.picked) {
+ continue;
+ }
+ if (!map.Overlaps(cpu_addr, size)) {
+ continue;
+ }
+ map.picked = true;
+ maps.push_back(map_id);
+ Image& image = slot_images[map.image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(map.image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(map.image_id, image)) {
+ return true;
+ }
+ } else {
+ func(map.image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+ }
+ for (const ImageMapId map_id : maps) {
+ slot_map_views[map_id].picked = false;
+ }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 8> images;
+ ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+ const auto it = gpu_page_table.find(page);
+ if (it == gpu_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
+ for (const ImageId image_id : it->second) {
+ Image& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ if (!image.OverlapsGPU(gpu_addr, size)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(image_id, image)) {
+ return true;
+ }
+ } else {
+ func(image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+ }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 8> images;
+ ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+ const auto it = sparse_page_table.find(page);
+ if (it == sparse_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Picked)) {
continue;
}
- if (!image.Overlaps(cpu_addr, size)) {
+ if (!image.OverlapsGPU(gpu_addr, size)) {
continue;
}
image.flags |= ImageFlagBits::Picked;
@@ -1276,6 +1476,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
}
template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
+ static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
+ const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+ for (auto& segment : segments) {
+ const auto gpu_addr = segment.first;
+ const auto size = segment.second;
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
+ if constexpr (RETURNS_BOOL) {
+ if (func(gpu_addr, *cpu_addr, size)) {
+ break;
+ }
+ } else {
+ func(gpu_addr, *cpu_addr, size);
+ }
+ }
+}
+
+template <class P>
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
Image& image = slot_images[image_id];
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
@@ -1292,8 +1513,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
"Trying to register an already registered image");
image.flags |= ImageFlagBits::Registered;
- ForEachPage(image.cpu_addr, image.guest_size_bytes,
- [this, image_id](u64 page) { page_table[page].push_back(image_id); });
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
@@ -1301,6 +1520,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ auto map_id =
+ slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
+ ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
+ [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+ image.map_view_id = map_id;
+ return;
+ }
+ std::vector<ImageViewId> sparse_maps{};
+ ForEachSparseSegment(
+ image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+ auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
+ ForEachCPUPage(cpu_addr, size,
+ [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+ sparse_maps.push_back(map_id);
+ });
+ sparse_views.emplace(image_id, std::move(sparse_maps));
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
}
template <class P>
@@ -1317,34 +1557,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
- ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
- const auto page_it = page_table.find(page);
- if (page_it == page_table.end()) {
- UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
- return;
- }
- std::vector<ImageId>& image_ids = page_it->second;
- const auto vector_it = std::ranges::find(image_ids, image_id);
- if (vector_it == image_ids.end()) {
- UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
- return;
- }
- image_ids.erase(vector_it);
+ const auto& clear_page_table =
+ [this, image_id](
+ u64 page,
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
+ const auto page_it = selected_page_table.find(page);
+ if (page_it == selected_page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageId>& image_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_ids, image_id);
+ if (vector_it == image_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+ page << PAGE_BITS);
+ return;
+ }
+ image_ids.erase(vector_it);
+ };
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ const auto map_id = image.map_view_id;
+ ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageMapId>& image_map_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_map_ids, map_id);
+ if (vector_it == image_map_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+ page << PAGE_BITS);
+ return;
+ }
+ image_map_ids.erase(vector_it);
+ });
+ slot_map_views.erase(map_id);
+ return;
+ }
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+ clear_page_table(page, sparse_page_table);
});
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map_range = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map_range.cpu_addr;
+ const std::size_t size = map_range.size;
+ ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageMapId>& image_map_ids = page_it->second;
+ auto vector_it = image_map_ids.begin();
+ while (vector_it != image_map_ids.end()) {
+ ImageMapView& map = slot_map_views[*vector_it];
+ if (map.image_id != image_id) {
+ vector_it++;
+ continue;
+ }
+ if (!map.picked) {
+ map.picked = true;
+ }
+ vector_it = image_map_ids.erase(vector_it);
+ }
+ });
+ slot_map_views.erase(map_view_id);
+ }
+ sparse_views.erase(it);
}
template <class P>
-void TextureCache<P>::TrackImage(ImageBase& image) {
+void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked;
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+ return;
+ }
+ if (True(image.flags & ImageFlagBits::Registered)) {
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map.cpu_addr;
+ const std::size_t size = map.size;
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ }
+ return;
+ }
+ ForEachSparseSegment(image,
+ [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ });
}
template <class P>
-void TextureCache<P>::UntrackImage(ImageBase& image) {
+void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
ASSERT(True(image.flags & ImageFlagBits::Tracked));
image.flags &= ~ImageFlagBits::Tracked;
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+ return;
+ }
+ ASSERT(True(image.flags & ImageFlagBits::Registered));
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map.cpu_addr;
+ const std::size_t size = map.size;
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
+ }
}
template <class P>
@@ -1486,10 +1817,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) {
- TrackImage(image);
+ TrackImage(image, image_id);
}
} else {
- RefreshContents(image);
+ RefreshContents(image, image_id);
SynchronizeAliases(image_id);
}
if (is_modification) {
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index c9571f7e4..9fbdc1ac6 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
constexpr SlotId CORRUPT_ID{0xfffffffe};
using ImageId = SlotId;
+using ImageMapId = SlotId;
using ImageViewId = SlotId;
using ImageAllocId = SlotId;
using SamplerId = SlotId;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 20794fa32..c872517b8 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
return offsets;
}
+LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
+ const u32 num_levels = info.resources.levels;
+ const LevelInfo level_info = MakeLevelInfo(info);
+ LevelArray sizes{};
+ for (u32 level = 0; level < num_levels; ++level) {
+ sizes[level] = CalculateLevelSize(level_info, level);
+ }
+ return sizes;
+}
+
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
std::vector<u32> offsets;
@@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies;
}
-bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
- if (config.Address() == 0) {
+bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
+ const GPUVAddr address = config.Address();
+ if (address == 0) {
return false;
}
- if (config.Address() > (u64(1) << 48)) {
+ if (address > (1ULL << 48)) {
return false;
}
- return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
+ if (gpu_memory.GpuToCpuAddress(address).has_value()) {
+ return true;
+ }
+ const ImageInfo info{config};
+ const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
+ return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
}
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index cdc5cbc75..766502908 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -40,6 +40,8 @@ struct OverlapResult {
[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
+[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
+
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
@@ -55,7 +57,7 @@ struct OverlapResult {
const ImageInfo& src,
SubresourceBase base);
-[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
+[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
GPUVAddr gpu_addr, const ImageInfo& info,