summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/CMakeLists.txt17
-rw-r--r--src/audio_core/CMakeLists.txt3
-rw-r--r--src/audio_core/audio_renderer.cpp92
-rw-r--r--src/audio_core/audio_renderer.h6
-rw-r--r--src/audio_core/command_generator.cpp216
-rw-r--r--src/audio_core/command_generator.h32
-rw-r--r--src/audio_core/common.h2
-rw-r--r--src/audio_core/info_updater.cpp3
-rw-r--r--src/audio_core/sink_context.cpp15
-rw-r--r--src/audio_core/sink_context.h2
-rw-r--r--src/audio_core/voice_context.cpp88
-rw-r--r--src/audio_core/voice_context.h13
-rw-r--r--src/common/fs/file.cpp29
-rw-r--r--src/common/fs/file.h11
-rw-r--r--src/common/logging/backend.cpp19
-rw-r--r--src/core/CMakeLists.txt11
-rw-r--r--src/core/file_sys/patch_manager.cpp24
-rw-r--r--src/core/file_sys/patch_manager.h3
-rw-r--r--src/core/file_sys/sdmc_factory.cpp31
-rw-r--r--src/core/file_sys/sdmc_factory.h6
-rw-r--r--src/core/hle/ipc_helpers.h8
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp10
-rw-r--r--src/core/hle/service/aoc/aoc_u.h1
-rw-r--r--src/core/hle/service/audio/audren_u.cpp14
-rw-r--r--src/core/hle/service/audio/hwopus.cpp45
-rw-r--r--src/core/hle/service/audio/hwopus.h4
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp24
-rw-r--r--src/core/hle/service/filesystem/filesystem.h1
-rw-r--r--src/core/hle/service/mii/manager.cpp5
-rw-r--r--src/input_common/CMakeLists.txt19
-rwxr-xr-xsrc/input_common/analog_from_button.cpp1
-rw-r--r--src/input_common/gcadapter/gc_adapter.cpp7
-rw-r--r--src/input_common/udp/protocol.h7
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h11
-rw-r--r--src/video_core/cdma_pusher.cpp3
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp85
-rw-r--r--src/video_core/command_classes/codecs/codec.h12
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp207
-rw-r--r--src/video_core/command_classes/codecs/h264.h132
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp4
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h307
-rw-r--r--src/video_core/command_classes/nvdec.cpp17
-rw-r--r--src/video_core/command_classes/nvdec.h8
-rw-r--r--src/video_core/command_classes/nvdec_common.h103
-rw-r--r--src/video_core/command_classes/vic.cpp18
-rw-r--r--src/video_core/engines/fermi_2d.cpp22
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt5
-rw-r--r--src/video_core/memory_manager.cpp109
-rw-r--r--src/video_core/memory_manager.h22
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_base.h2
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp58
-rw-r--r--src/video_core/renderer_opengl/gl_device.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp29
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h4
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/texture_cache/image_base.cpp5
-rw-r--r--src/video_core/texture_cache/image_base.h39
-rw-r--r--src/video_core/texture_cache/texture_cache.h495
-rw-r--r--src/video_core/texture_cache/types.h1
-rw-r--r--src/video_core/texture_cache/util.cpp26
-rw-r--r--src/video_core/texture_cache/util.h4
-rw-r--r--src/video_core/textures/astc.cpp4
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp21
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h3
-rw-r--r--src/yuzu/debugger/profiler.cpp9
-rw-r--r--src/yuzu/game_list.cpp12
-rw-r--r--src/yuzu/game_list.h7
-rw-r--r--src/yuzu/main.cpp25
-rw-r--r--src/yuzu/main.h7
77 files changed, 1841 insertions, 783 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f30dd49a3..f8ec8fea8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -45,13 +45,23 @@ if (MSVC)
/Zc:inline
/Zc:throwingNew
+ # External headers diagnostics
+ /experimental:external # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later
+ /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers
+ /external:W0 # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers
+
# Warnings
/W3
- /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled
+ /we4018 # 'expression': signed/unsigned mismatch
+ /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled
/we4101 # 'identifier': unreferenced local variable
+ /we4189 # 'identifier': local variable is initialized but not referenced
/we4265 # 'class': class has virtual functions, but destructor is not virtual
- /we4388 # signed/unsigned mismatch
- /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
+ /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data
+ /we4305 # 'context': truncation from 'type1' to 'type2'
+ /we4388 # 'expression': signed/unsigned mismatch
+ /we4389 # 'operator': signed/unsigned mismatch
+ /we4547 # 'operator': operator before comma has no effect; expected operator with side-effect
/we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
/we4555 # Expression has no effect; expected expression with side-effect
/we4715 # 'function': not all control paths return a value
@@ -72,6 +82,7 @@ else()
-Werror=missing-declarations
-Werror=missing-field-initializers
-Werror=reorder
+ -Werror=sign-compare
-Werror=switch
-Werror=uninitialized
-Werror=unused-function
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d25a1a645..090dd19b1 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -51,9 +51,6 @@ if (NOT MSVC)
target_compile_options(audio_core PRIVATE
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=reorder
- -Werror=sign-compare
-Werror=shadow
-Werror=unused-parameter
-Werror=unused-variable
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 80ffddb10..7dba739b4 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -12,6 +12,7 @@
#include "audio_core/voice_context.h"
#include "common/logging/log.h"
#include "common/settings.h"
+#include "core/core_timing.h"
#include "core/memory.h"
namespace {
@@ -28,10 +29,9 @@ namespace {
(static_cast<float>(r_channel) * r_mix_amount)));
}
-[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel,
- s16 fc_channel,
- [[maybe_unused]] s16 lf_channel,
- s16 bl_channel, s16 br_channel) {
+[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(
+ s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel,
+ s16 br_channel) {
// Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels
// are mixed to be 36.94%
@@ -56,11 +56,11 @@ namespace {
const std::array<float_le, 4>& coeff) {
const auto left =
static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
- static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0];
+ static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3];
const auto right =
static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
- static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0];
+ static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3];
return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))};
}
@@ -68,7 +68,9 @@ namespace {
} // namespace
namespace AudioCore {
-AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
+constexpr s32 NUM_BUFFERS = 2;
+
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_,
AudioCommon::AudioRendererParameter params,
Stream::ReleaseCallback&& release_callback,
std::size_t instance_number)
@@ -77,7 +79,8 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
sink_context(params.sink_count), splitter_context(),
voices(params.voice_count), memory{memory_},
command_generator(worker_params, voice_context, mix_context, splitter_context, effect_context,
- memory) {
+ memory),
+ core_timing{core_timing_} {
behavior_info.SetUserRevision(params.revision);
splitter_context.Initialize(behavior_info, params.splitter_count,
params.num_splitter_send_channels);
@@ -86,16 +89,27 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
stream = audio_out->OpenStream(
core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
- audio_out->StartStream(stream);
-
- QueueMixedBuffer(0);
- QueueMixedBuffer(1);
- QueueMixedBuffer(2);
- QueueMixedBuffer(3);
+ process_event = Core::Timing::CreateEvent(
+ fmt::format("AudioRenderer-Instance{}-Process", instance_number),
+ [this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); });
+ for (s32 i = 0; i < NUM_BUFFERS; ++i) {
+ QueueMixedBuffer(i);
+ }
}
AudioRenderer::~AudioRenderer() = default;
+ResultCode AudioRenderer::Start() {
+ audio_out->StartStream(stream);
+ ReleaseAndQueueBuffers();
+ return ResultSuccess;
+}
+
+ResultCode AudioRenderer::Stop() {
+ audio_out->StopStream(stream);
+ return ResultSuccess;
+}
+
u32 AudioRenderer::GetSampleRate() const {
return worker_params.sample_rate;
}
@@ -114,7 +128,7 @@ Stream::State AudioRenderer::GetStreamState() const {
ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params,
std::vector<u8>& output_params) {
-
+ std::scoped_lock lock{mutex};
InfoUpdater info_updater{input_params, output_params, behavior_info};
if (!info_updater.UpdateBehaviorInfo(behavior_info)) {
@@ -194,9 +208,6 @@ ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_param
LOG_ERROR(Audio, "Audio buffers were not consumed!");
return AudioCommon::Audren::ERR_INVALID_PARAMETERS;
}
-
- ReleaseAndQueueBuffers();
-
return ResultSuccess;
}
@@ -220,10 +231,8 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
command_generator.PostCommand();
// Base sample size
std::size_t BUFFER_SIZE{worker_params.sample_count};
- // Samples
- std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels());
- // Make sure to clear our samples
- std::memset(buffer.data(), 0, buffer.size() * sizeof(s16));
+ // Samples, making sure to clear
+ std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels(), 0);
if (sink_context.InUse()) {
const auto stream_channel_count = stream->GetNumChannels();
@@ -231,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
const auto channel_count = buffer_offsets.size();
const auto& final_mix = mix_context.GetFinalMixInfo();
const auto& in_params = final_mix.GetInParams();
- std::vector<s32*> mix_buffers(channel_count);
+ std::vector<std::span<s32>> mix_buffers(channel_count);
for (std::size_t i = 0; i < channel_count; i++) {
mix_buffers[i] =
command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]);
@@ -284,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample);
} else if (stream_channel_count == 2) {
// Mix all channels into 2 channels
- if (sink_context.HasDownMixingCoefficients()) {
- const auto [left, right] = Mix6To2WithCoefficients(
- fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
- sink_context.GetDownmixCoefficients());
- buffer[i * stream_channel_count + 0] = left;
- buffer[i * stream_channel_count + 1] = right;
- } else {
- const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample,
- lf_sample, bl_sample, br_sample);
- buffer[i * stream_channel_count + 0] = left;
- buffer[i * stream_channel_count + 1] = right;
- }
+ const auto [left, right] = Mix6To2WithCoefficients(
+ fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
+ sink_context.GetDownmixCoefficients());
+ buffer[i * stream_channel_count + 0] = left;
+ buffer[i * stream_channel_count + 1] = right;
} else if (stream_channel_count == 6) {
// Pass through
buffer[i * stream_channel_count + 0] = fl_sample;
@@ -315,10 +317,24 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
}
void AudioRenderer::ReleaseAndQueueBuffers() {
- const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
- for (const auto& tag : released_buffers) {
- QueueMixedBuffer(tag);
+ if (!stream->IsPlaying()) {
+ return;
}
+
+ {
+ std::scoped_lock lock{mutex};
+ const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
+ for (const auto& tag : released_buffers) {
+ QueueMixedBuffer(tag);
+ }
+ }
+
+ const f32 sample_rate = static_cast<f32>(GetSampleRate());
+ const f32 sample_count = static_cast<f32>(GetSampleCount());
+ const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240));
+ const s32 ms = (1000 / static_cast<s32>(consume_rate)) - 1;
+ const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1));
+ core_timing.ScheduleEvent(next_event_time, process_event, {});
}
} // namespace AudioCore
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 18567f618..88fdd13dd 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -6,6 +6,7 @@
#include <array>
#include <memory>
+#include <mutex>
#include <vector>
#include "audio_core/behavior_info.h"
@@ -45,6 +46,8 @@ public:
[[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params,
std::vector<u8>& output_params);
+ [[nodiscard]] ResultCode Start();
+ [[nodiscard]] ResultCode Stop();
void QueueMixedBuffer(Buffer::Tag tag);
void ReleaseAndQueueBuffers();
[[nodiscard]] u32 GetSampleRate() const;
@@ -68,6 +71,9 @@ private:
Core::Memory::Memory& memory;
CommandGenerator command_generator;
std::size_t elapsed_frame_count{};
+ Core::Timing::CoreTiming& core_timing;
+ std::shared_ptr<Core::Timing::EventType> process_event;
+ std::mutex mutex;
};
} // namespace AudioCore
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 437cc5ccd..b99d0fc91 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
template <std::size_t N>
-void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) {
for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) {
for (std::size_t j = 0; j < N; j++) {
output[i + j] +=
@@ -40,7 +40,8 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
}
}
-s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) {
+s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta,
+ s32 sample_count) {
s32 x = 0;
for (s32 i = 0; i < sample_count; i++) {
x = static_cast<s32>(static_cast<float>(input[i]) * gain);
@@ -50,20 +51,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam
return x;
}
-void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) {
+void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta,
+ s32 sample_count) {
for (s32 i = 0; i < sample_count; i++) {
output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
gain += delta;
}
}
-void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain,
+ s32 sample_count) {
for (s32 i = 0; i < sample_count; i++) {
output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
}
}
-s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
+s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) {
const bool positive = first_sample > 0;
auto final_sample = std::abs(first_sample);
for (s32 i = 0; i < sample_count; i++) {
@@ -128,10 +131,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
template <std::size_t CHANNEL_COUNT>
-void ApplyReverbGeneric(I3dl2ReverbState& state,
- const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
- const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
- s32 sample_count) {
+void ApplyReverbGeneric(
+ I3dl2ReverbState& state,
+ const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input,
+ const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) {
auto GetTapLookup = []() {
if constexpr (CHANNEL_COUNT == 1) {
@@ -400,7 +403,10 @@ void CommandGenerator::GenerateDataSourceCommand(ServerVoiceInfo& voice_info, Vo
}
} else {
switch (in_params.sample_format) {
+ case SampleFormat::Pcm8:
case SampleFormat::Pcm16:
+ case SampleFormat::Pcm32:
+ case SampleFormat::PcmFloat:
DecodeFromWaveBuffers(voice_info, GetChannelMixBuffer(channel), dsp_state, channel,
worker_params.sample_rate, worker_params.sample_count,
in_params.node_id);
@@ -454,8 +460,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff
"input_mix_buffer={}, output_mix_buffer={}",
node_id, input_offset, output_offset);
}
- const auto* input = GetMixBuffer(input_offset);
- auto* output = GetMixBuffer(output_offset);
+ std::span<const s32> input = GetMixBuffer(input_offset);
+ std::span<s32> output = GetMixBuffer(output_offset);
// Biquad filter parameters
const auto [n0, n1, n2] = params.numerator;
@@ -548,8 +554,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
return;
}
- std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
- std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
+ std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{};
+ std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{};
const auto status = params.status;
for (s32 i = 0; i < channel_count; i++) {
@@ -584,7 +590,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
for (s32 i = 0; i < channel_count; i++) {
// Only copy if the buffer input and output do not match!
if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
- std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
+ std::memcpy(output[i].data(), input[i].data(),
+ worker_params.sample_count * sizeof(s32));
}
}
}
@@ -600,8 +607,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset,
for (s32 i = 0; i < channel_count; i++) {
// TODO(ogniK): Actually implement biquad filter
if (params.input[i] != params.output[i]) {
- const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]);
- auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
+ std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]);
+ std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]);
ApplyMix<1>(output, input, 32768, worker_params.sample_count);
}
}
@@ -640,14 +647,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf
if (samples_read != static_cast<int>(worker_params.sample_count) &&
samples_read <= params.sample_count) {
- std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read);
+ std::memset(GetMixBuffer(output_index).data(), 0,
+ params.sample_count - samples_read);
}
} else {
AuxInfoDSP empty{};
memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP));
memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP));
if (output_index != input_index) {
- std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index),
+ std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(),
worker_params.sample_count * sizeof(s32));
}
}
@@ -665,7 +673,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter
}
s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
- const s32* data, u32 sample_count, u32 write_offset,
+ std::span<const s32> data, u32 sample_count, u32 write_offset,
u32 write_count) {
if (max_samples == 0) {
return 0;
@@ -675,14 +683,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
return 0;
}
- std::size_t data_offset{};
+ s32 data_offset{};
u32 remaining = sample_count;
while (remaining > 0) {
// Get position in buffer
const auto base = send_buffer + (offset * sizeof(u32));
const auto samples_to_grab = std::min(max_samples - offset, remaining);
// Write to output
- memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32));
+ memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32));
offset = (offset + samples_to_grab) % max_samples;
remaining -= samples_to_grab;
data_offset += samples_to_grab;
@@ -695,7 +703,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
}
s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
- s32* out_data, u32 sample_count, u32 read_offset,
+ std::span<s32> out_data, u32 sample_count, u32 read_offset,
u32 read_count) {
if (max_samples == 0) {
return 0;
@@ -707,15 +715,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
}
u32 remaining = sample_count;
+ s32 data_offset{};
while (remaining > 0) {
const auto base = recv_buffer + (offset * sizeof(u32));
const auto samples_to_grab = std::min(max_samples - offset, remaining);
std::vector<s32> buffer(samples_to_grab);
memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32));
- std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32));
- out_data += samples_to_grab;
+ std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32));
offset = (offset + samples_to_grab) % max_samples;
remaining -= samples_to_grab;
+ data_offset += samples_to_grab;
}
if (read_count != 0) {
@@ -795,7 +804,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
state.lowpass_1 = 0.0f;
} else {
const auto a = 1.0f - hf_gain;
- const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
+ const auto b = 2.0f * (2.0f - hf_gain * CosD(256.0f * info.hf_reference /
static_cast<f32>(info.sample_rate)));
const auto c = std::sqrt(b * b - 4.0f * a * a);
@@ -843,7 +852,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
}
const auto max_early_delay = state.early_delay_line.GetMaxDelay();
- const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
+ const auto reflection_time = 1000.0f * (0.9998f * info.reverb_delay + 0.02f);
for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
const auto length = AudioCommon::CalculateDelaySamples(
sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
@@ -962,8 +971,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t
node_id, input_offset, output_offset, volume);
}
- auto* output = GetMixBuffer(output_offset);
- const auto* input = GetMixBuffer(input_offset);
+ std::span<s32> output = GetMixBuffer(output_offset);
+ std::span<const s32> input = GetMixBuffer(input_offset);
const s32 gain = static_cast<s32>(volume * 32768.0f);
// Mix with loop unrolling
@@ -1003,8 +1012,10 @@ void CommandGenerator::GenerateFinalMixCommand() {
}
}
-s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
- s32 sample_count, s32 channel, std::size_t mix_offset) {
+template <typename T>
+s32 CommandGenerator::DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
+ s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
+ s32 channel, std::size_t mix_offset) {
const auto& in_params = voice_info.GetInParams();
const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
if (wave_buffer.buffer_address == 0) {
@@ -1013,39 +1024,50 @@ s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_s
if (wave_buffer.buffer_size == 0) {
return 0;
}
- if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) {
+ if (sample_end_offset < sample_start_offset) {
return 0;
}
- const auto samples_remaining =
- (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
+ const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
const auto start_offset =
- ((wave_buffer.start_sample_offset + dsp_state.offset) * in_params.channel_count) *
- sizeof(s16);
+ ((dsp_state.offset + sample_start_offset) * in_params.channel_count) * sizeof(T);
const auto buffer_pos = wave_buffer.buffer_address + start_offset;
const auto samples_processed = std::min(sample_count, samples_remaining);
- if (in_params.channel_count == 1) {
- std::vector<s16> buffer(samples_processed);
- memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16));
- for (std::size_t i = 0; i < buffer.size(); i++) {
- sample_buffer[mix_offset + i] = buffer[i];
- }
- } else {
- const auto channel_count = in_params.channel_count;
- std::vector<s16> buffer(samples_processed * channel_count);
- memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16));
+ const auto channel_count = in_params.channel_count;
+ std::vector<T> buffer(samples_processed * channel_count);
+ memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(T));
+ if constexpr (std::is_floating_point_v<T>) {
+ for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+ sample_buffer[mix_offset + i] = static_cast<s32>(buffer[i * channel_count + channel] *
+ std::numeric_limits<s16>::max());
+ }
+ } else if constexpr (sizeof(T) == 1) {
+ for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+ sample_buffer[mix_offset + i] =
+ static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
+ std::numeric_limits<s8>::max()) *
+ std::numeric_limits<s16>::max());
+ }
+ } else if constexpr (sizeof(T) == 2) {
for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
sample_buffer[mix_offset + i] = buffer[i * channel_count + channel];
}
+ } else {
+ for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+ sample_buffer[mix_offset + i] =
+ static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
+ std::numeric_limits<s32>::max()) *
+ std::numeric_limits<s16>::max());
+ }
}
return samples_processed;
}
s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
- s32 sample_count, [[maybe_unused]] s32 channel,
- std::size_t mix_offset) {
+ s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
+ [[maybe_unused]] s32 channel, std::size_t mix_offset) {
const auto& in_params = voice_info.GetInParams();
const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
if (wave_buffer.buffer_address == 0) {
@@ -1054,7 +1076,7 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
if (wave_buffer.buffer_size == 0) {
return 0;
}
- if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) {
+ if (sample_end_offset < sample_start_offset) {
return 0;
}
@@ -1079,10 +1101,9 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
s32 coef1 = coeffs[idx * 2];
s32 coef2 = coeffs[idx * 2 + 1];
- const auto samples_remaining =
- (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
+ const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
const auto samples_processed = std::min(sample_count, samples_remaining);
- const auto sample_pos = wave_buffer.start_sample_offset + dsp_state.offset;
+ const auto sample_pos = dsp_state.offset + sample_start_offset;
const auto samples_remaining_in_frame = sample_pos % SAMPLES_PER_FRAME;
auto position_in_frame = ((sample_pos / SAMPLES_PER_FRAME) * NIBBLES_PER_SAMPLE) +
@@ -1157,12 +1178,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
return samples_processed;
}
-s32* CommandGenerator::GetMixBuffer(std::size_t index) {
- return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) {
+ return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count),
+ worker_params.sample_count);
}
-const s32* CommandGenerator::GetMixBuffer(std::size_t index) const {
- return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const {
+ return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count),
+ worker_params.sample_count);
}
std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const {
@@ -1173,15 +1196,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const {
return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT;
}
-s32* CommandGenerator::GetChannelMixBuffer(s32 channel) {
+std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) {
return GetMixBuffer(worker_params.mix_buffer_count + channel);
}
-const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const {
+std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const {
return GetMixBuffer(worker_params.mix_buffer_count + channel);
}
-void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output,
+void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
VoiceState& dsp_state, s32 channel,
s32 target_sample_rate, s32 sample_count,
s32 node_id) {
@@ -1193,7 +1216,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate,
in_params.mix_id, in_params.splitter_info_id);
}
- ASSERT_OR_EXECUTE(output != nullptr, { return; });
+ ASSERT_OR_EXECUTE(output.data() != nullptr, { return; });
const auto resample_rate = static_cast<s32>(
static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) *
@@ -1210,9 +1233,9 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
}
std::size_t temp_mix_offset{};
- bool is_buffer_completed{false};
+ s32 samples_output{};
auto samples_remaining = sample_count;
- while (samples_remaining > 0 && !is_buffer_completed) {
+ while (samples_remaining > 0) {
const auto samples_to_output = std::min(samples_remaining, min_required_samples);
const auto samples_to_read = (samples_to_output * resample_rate + dsp_state.fraction) >> 15;
@@ -1229,24 +1252,53 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
// No more data can be read
if (!dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index]) {
- is_buffer_completed = true;
break;
}
if (in_params.sample_format == SampleFormat::Adpcm && dsp_state.offset == 0 &&
wave_buffer.context_address != 0 && wave_buffer.context_size != 0) {
- // TODO(ogniK): ADPCM loop context
+ memory.ReadBlock(wave_buffer.context_address, &dsp_state.context,
+ sizeof(ADPCMContext));
+ }
+
+ s32 samples_offset_start;
+ s32 samples_offset_end;
+ if (dsp_state.loop_count > 0 && wave_buffer.loop_start_sample != 0 &&
+ wave_buffer.loop_end_sample != 0 &&
+ wave_buffer.loop_start_sample <= wave_buffer.loop_end_sample) {
+ samples_offset_start = wave_buffer.loop_start_sample;
+ samples_offset_end = wave_buffer.loop_end_sample;
+ } else {
+ samples_offset_start = wave_buffer.start_sample_offset;
+ samples_offset_end = wave_buffer.end_sample_offset;
}
s32 samples_decoded{0};
switch (in_params.sample_format) {
+ case SampleFormat::Pcm8:
+ samples_decoded =
+ DecodePcm<s8>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
+ break;
case SampleFormat::Pcm16:
- samples_decoded = DecodePcm16(voice_info, dsp_state, samples_to_read - samples_read,
- channel, temp_mix_offset);
+ samples_decoded =
+ DecodePcm<s16>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
+ break;
+ case SampleFormat::Pcm32:
+ samples_decoded =
+ DecodePcm<s32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
+ break;
+ case SampleFormat::PcmFloat:
+ samples_decoded =
+ DecodePcm<f32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
break;
case SampleFormat::Adpcm:
- samples_decoded = DecodeAdpcm(voice_info, dsp_state, samples_to_read - samples_read,
- channel, temp_mix_offset);
+ samples_decoded =
+ DecodeAdpcm(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
break;
default:
UNREACHABLE_MSG("Unimplemented sample format={}", in_params.sample_format);
@@ -1257,15 +1309,19 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
dsp_state.offset += samples_decoded;
dsp_state.played_sample_count += samples_decoded;
- if (dsp_state.offset >=
- (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) ||
+ if (dsp_state.offset >= (samples_offset_end - samples_offset_start) ||
samples_decoded == 0) {
// Reset our sample offset
dsp_state.offset = 0;
if (wave_buffer.is_looping) {
- if (samples_decoded == 0) {
+ dsp_state.loop_count++;
+ if (wave_buffer.loop_count > 0 &&
+ (dsp_state.loop_count > wave_buffer.loop_count || samples_decoded == 0)) {
// End of our buffer
- is_buffer_completed = true;
+ voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
+ }
+
+ if (samples_decoded == 0) {
break;
}
@@ -1273,35 +1329,29 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
dsp_state.played_sample_count = 0;
}
} else {
-
// Update our wave buffer states
- dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false;
- dsp_state.wave_buffer_consumed++;
- dsp_state.wave_buffer_index =
- (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
- if (wave_buffer.end_of_stream) {
- dsp_state.played_sample_count = 0;
- }
+ voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
}
}
}
if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) {
// No need to resample
- std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32));
+ std::memcpy(output.data() + samples_output, sample_buffer.data(),
+ samples_read * sizeof(s32));
} else {
std::fill(sample_buffer.begin() + temp_mix_offset,
sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read),
0);
- AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction,
- samples_to_output);
+ AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate,
+ dsp_state.fraction, samples_to_output);
// Resample
for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) {
dsp_state.sample_history[i] = sample_buffer[samples_to_read + i];
}
}
- output += samples_to_output;
samples_remaining -= samples_to_output;
+ samples_output += samples_to_output;
}
}
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index 2ebb755b0..59a33ba76 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <span>
#include "audio_core/common.h"
#include "audio_core/voice_context.h"
#include "common/common_types.h"
@@ -41,10 +42,10 @@ public:
void PreCommand();
void PostCommand();
- [[nodiscard]] s32* GetChannelMixBuffer(s32 channel);
- [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const;
- [[nodiscard]] s32* GetMixBuffer(std::size_t index);
- [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const;
+ [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel);
+ [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const;
+ [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index);
+ [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const;
[[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const;
[[nodiscard]] std::size_t GetTotalMixBufferCount() const;
@@ -77,21 +78,24 @@ private:
void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled);
[[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index);
- s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data,
- u32 sample_count, u32 write_offset, u32 write_count);
- s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
- u32 sample_count, u32 read_offset, u32 read_count);
+ s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
+ std::span<const s32> data, u32 sample_count, u32 write_offset,
+ u32 write_count);
+ s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
+ std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count);
void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
std::vector<u8>& work_buffer);
void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
// DSP Code
- s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
- s32 channel, std::size_t mix_offset);
- s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
- s32 channel, std::size_t mix_offset);
- void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state,
- s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id);
+ template <typename T>
+ s32 DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
+ s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
+ s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
+ s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
+ void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
+ VoiceState& dsp_state, s32 channel, s32 target_sample_rate,
+ s32 sample_count, s32 node_id);
AudioCommon::AudioRendererParameter& worker_params;
VoiceContext& voice_context;
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index fe546c55d..1ab537588 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -15,7 +15,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
constexpr ResultCode ERR_SPLITTER_SORT_FAILED{ErrorModule::Audio, 43};
} // namespace Audren
-constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
+constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '9');
constexpr std::size_t MAX_MIX_BUFFERS = 24;
constexpr std::size_t MAX_BIQUAD_FILTERS = 2;
constexpr std::size_t MAX_CHANNEL_COUNT = 6;
diff --git a/src/audio_core/info_updater.cpp b/src/audio_core/info_updater.cpp
index 4a5b1b4ab..9b4ca1851 100644
--- a/src/audio_core/info_updater.cpp
+++ b/src/audio_core/info_updater.cpp
@@ -189,9 +189,6 @@ bool InfoUpdater::UpdateVoices(VoiceContext& voice_context,
if (voice_in_params.is_new) {
// Default our values for our voice
voice_info.Initialize();
- if (channel_count == 0 || channel_count > AudioCommon::MAX_CHANNEL_COUNT) {
- continue;
- }
// Zero out our voice states
for (std::size_t channel = 0; channel < channel_count; channel++) {
diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp
index a69543696..cc55b290c 100644
--- a/src/audio_core/sink_context.cpp
+++ b/src/audio_core/sink_context.cpp
@@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const {
void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) {
ASSERT(in.type == SinkTypes::Device);
- has_downmix_coefs = in.device.down_matrix_enabled;
- if (has_downmix_coefs) {
+ if (in.device.down_matrix_enabled) {
downmix_coefficients = in.device.down_matrix_coef;
+ } else {
+ downmix_coefficients = {
+ 1.0f, // front
+ 0.707f, // center
+ 0.0f, // lfe
+ 0.707f, // back
+ };
}
+
in_use = in.in_use;
use_count = in.device.input_count;
buffers = in.device.input;
@@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const {
return buffer_ret;
}
-bool SinkContext::HasDownMixingCoefficients() const {
- return has_downmix_coefs;
-}
-
const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const {
return downmix_coefficients;
}
diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h
index 9e2b69785..254961fe2 100644
--- a/src/audio_core/sink_context.h
+++ b/src/audio_core/sink_context.h
@@ -84,7 +84,6 @@ public:
[[nodiscard]] bool InUse() const;
[[nodiscard]] std::vector<u8> OutputBuffers() const;
- [[nodiscard]] bool HasDownMixingCoefficients() const;
[[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const;
private:
@@ -92,7 +91,6 @@ private:
s32 use_count{};
std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{};
std::size_t sink_count{};
- bool has_downmix_coefs{false};
DownmixCoefficients downmix_coefficients{};
};
} // namespace AudioCore
diff --git a/src/audio_core/voice_context.cpp b/src/audio_core/voice_context.cpp
index 867b8fc6b..d8c954b60 100644
--- a/src/audio_core/voice_context.cpp
+++ b/src/audio_core/voice_context.cpp
@@ -66,7 +66,7 @@ void ServerVoiceInfo::Initialize() {
in_params.last_volume = 0.0f;
in_params.biquad_filter.fill({});
in_params.wave_buffer_count = 0;
- in_params.wave_bufffer_head = 0;
+ in_params.wave_buffer_head = 0;
in_params.mix_id = AudioCommon::NO_MIX;
in_params.splitter_info_id = AudioCommon::NO_SPLITTER;
in_params.additional_params_address = 0;
@@ -75,7 +75,7 @@ void ServerVoiceInfo::Initialize() {
out_params.played_sample_count = 0;
out_params.wave_buffer_consumed = 0;
in_params.voice_drop_flag = false;
- in_params.buffer_mapped = false;
+ in_params.buffer_mapped = true;
in_params.wave_buffer_flush_request_count = 0;
in_params.was_biquad_filter_enabled.fill(false);
@@ -126,7 +126,7 @@ void ServerVoiceInfo::UpdateParameters(const VoiceInfo::InParams& voice_in,
in_params.volume = voice_in.volume;
in_params.biquad_filter = voice_in.biquad_filter;
in_params.wave_buffer_count = voice_in.wave_buffer_count;
- in_params.wave_bufffer_head = voice_in.wave_buffer_head;
+ in_params.wave_buffer_head = voice_in.wave_buffer_head;
if (behavior_info.IsFlushVoiceWaveBuffersSupported()) {
const auto in_request_count = in_params.wave_buffer_flush_request_count;
const auto voice_request_count = voice_in.wave_buffer_flush_request_count;
@@ -185,14 +185,16 @@ void ServerVoiceInfo::UpdateWaveBuffers(
wave_buffer.buffer_size = 0;
wave_buffer.context_address = 0;
wave_buffer.context_size = 0;
+ wave_buffer.loop_start_sample = 0;
+ wave_buffer.loop_end_sample = 0;
wave_buffer.sent_to_dsp = true;
}
// Mark all our wave buffers as invalid
for (std::size_t channel = 0; channel < static_cast<std::size_t>(in_params.channel_count);
channel++) {
- for (auto& is_valid : voice_states[channel]->is_wave_buffer_valid) {
- is_valid = false;
+ for (std::size_t i = 0; i < AudioCommon::MAX_WAVE_BUFFERS; ++i) {
+ voice_states[channel]->is_wave_buffer_valid[i] = false;
}
}
}
@@ -211,7 +213,7 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
const WaveBuffer& in_wave_buffer, SampleFormat sample_format,
bool is_buffer_valid,
[[maybe_unused]] BehaviorInfo& behavior_info) {
- if (!is_buffer_valid && out_wavebuffer.sent_to_dsp) {
+ if (!is_buffer_valid && out_wavebuffer.sent_to_dsp && out_wavebuffer.buffer_address != 0) {
out_wavebuffer.buffer_address = 0;
out_wavebuffer.buffer_size = 0;
}
@@ -219,11 +221,40 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
if (!in_wave_buffer.sent_to_server || !in_params.buffer_mapped) {
// Validate sample offset sizings
if (sample_format == SampleFormat::Pcm16) {
- const auto buffer_size = in_wave_buffer.buffer_size;
- if (in_wave_buffer.start_sample_offset < 0 || in_wave_buffer.end_sample_offset < 0 ||
- (buffer_size < (sizeof(s16) * in_wave_buffer.start_sample_offset)) ||
- (buffer_size < (sizeof(s16) * in_wave_buffer.end_sample_offset))) {
+ const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
+ const s64 start = sizeof(s16) * in_wave_buffer.start_sample_offset;
+ const s64 end = sizeof(s16) * in_wave_buffer.end_sample_offset;
+ if (0 > start || start > buffer_size || 0 > end || end > buffer_size) {
// TODO(ogniK): Write error info
+ LOG_ERROR(Audio,
+ "PCM16 wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
+ "offsets were "
+ "{:08X} - 0x{:08X}",
+ buffer_size, sizeof(s16) * in_wave_buffer.start_sample_offset,
+ sizeof(s16) * in_wave_buffer.end_sample_offset);
+ return;
+ }
+ } else if (sample_format == SampleFormat::Adpcm) {
+ const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
+ const s64 start_frames = in_wave_buffer.start_sample_offset / 14;
+ const s64 start_extra = in_wave_buffer.start_sample_offset % 14 == 0
+ ? 0
+ : (in_wave_buffer.start_sample_offset % 14) / 2 + 1 +
+ (in_wave_buffer.start_sample_offset % 2);
+ const s64 start = start_frames * 8 + start_extra;
+ const s64 end_frames = in_wave_buffer.end_sample_offset / 14;
+ const s64 end_extra = in_wave_buffer.end_sample_offset % 14 == 0
+ ? 0
+ : (in_wave_buffer.end_sample_offset % 14) / 2 + 1 +
+ (in_wave_buffer.end_sample_offset % 2);
+ const s64 end = end_frames * 8 + end_extra;
+ if (in_wave_buffer.start_sample_offset < 0 || start > buffer_size ||
+ in_wave_buffer.end_sample_offset < 0 || end > buffer_size) {
+ LOG_ERROR(Audio,
+ "ADPMC wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
+ "offsets were "
+ "{:08X} - 0x{:08X}",
+ in_wave_buffer.buffer_size, start, end);
return;
}
}
@@ -239,29 +270,34 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
out_wavebuffer.buffer_size = in_wave_buffer.buffer_size;
out_wavebuffer.context_address = in_wave_buffer.context_address;
out_wavebuffer.context_size = in_wave_buffer.context_size;
+ out_wavebuffer.loop_start_sample = in_wave_buffer.loop_start_sample;
+ out_wavebuffer.loop_end_sample = in_wave_buffer.loop_end_sample;
in_params.buffer_mapped =
in_wave_buffer.buffer_address != 0 && in_wave_buffer.buffer_size != 0;
// TODO(ogniK): Pool mapper attachment
// TODO(ogniK): IsAdpcmLoopContextBugFixed
+ if (sample_format == SampleFormat::Adpcm && in_wave_buffer.context_address != 0 &&
+ in_wave_buffer.context_size != 0 && behavior_info.IsAdpcmLoopContextBugFixed()) {
+ } else {
+ out_wavebuffer.context_address = 0;
+ out_wavebuffer.context_size = 0;
+ }
}
}
void ServerVoiceInfo::WriteOutStatus(
VoiceInfo::OutParams& voice_out, VoiceInfo::InParams& voice_in,
std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& voice_states) {
- if (voice_in.is_new) {
+ if (voice_in.is_new || in_params.is_new) {
in_params.is_new = true;
voice_out.wave_buffer_consumed = 0;
voice_out.played_sample_count = 0;
voice_out.voice_dropped = false;
- } else if (!in_params.is_new) {
- voice_out.wave_buffer_consumed = voice_states[0]->wave_buffer_consumed;
- voice_out.played_sample_count = voice_states[0]->played_sample_count;
- voice_out.voice_dropped = in_params.voice_drop_flag;
} else {
- voice_out.wave_buffer_consumed = 0;
- voice_out.played_sample_count = 0;
- voice_out.voice_dropped = false;
+ const auto& state = voice_states[0];
+ voice_out.wave_buffer_consumed = state->wave_buffer_consumed;
+ voice_out.played_sample_count = state->played_sample_count;
+ voice_out.voice_dropped = state->voice_dropped;
}
}
@@ -283,7 +319,8 @@ ServerVoiceInfo::OutParams& ServerVoiceInfo::GetOutParams() {
bool ServerVoiceInfo::ShouldSkip() const {
// TODO(ogniK): Handle unmapped wave buffers or parameters
- return !in_params.in_use || (in_params.wave_buffer_count == 0) || in_params.voice_drop_flag;
+ return !in_params.in_use || in_params.wave_buffer_count == 0 || !in_params.buffer_mapped ||
+ in_params.voice_drop_flag;
}
bool ServerVoiceInfo::UpdateForCommandGeneration(VoiceContext& voice_context) {
@@ -381,7 +418,7 @@ bool ServerVoiceInfo::UpdateParametersForCommandGeneration(
void ServerVoiceInfo::FlushWaveBuffers(
u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
s32 channel_count) {
- auto wave_head = in_params.wave_bufffer_head;
+ auto wave_head = in_params.wave_buffer_head;
for (u8 i = 0; i < flush_count; i++) {
in_params.wave_buffer[wave_head].sent_to_dsp = true;
@@ -401,6 +438,17 @@ bool ServerVoiceInfo::HasValidWaveBuffer(const VoiceState* state) const {
return std::find(valid_wb.begin(), valid_wb.end(), true) != valid_wb.end();
}
+void ServerVoiceInfo::SetWaveBufferCompleted(VoiceState& dsp_state,
+ const ServerWaveBuffer& wave_buffer) {
+ dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false;
+ dsp_state.wave_buffer_consumed++;
+ dsp_state.wave_buffer_index = (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
+ dsp_state.loop_count = 0;
+ if (wave_buffer.end_of_stream) {
+ dsp_state.played_sample_count = 0;
+ }
+}
+
VoiceContext::VoiceContext(std::size_t voice_count_) : voice_count{voice_count_} {
for (std::size_t i = 0; i < voice_count; i++) {
voice_channel_resources.emplace_back(static_cast<s32>(i));
diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h
index 70359cadb..e1050897b 100644
--- a/src/audio_core/voice_context.h
+++ b/src/audio_core/voice_context.h
@@ -60,10 +60,12 @@ struct WaveBuffer {
u8 is_looping{};
u8 end_of_stream{};
u8 sent_to_server{};
- INSERT_PADDING_BYTES(5);
+ INSERT_PADDING_BYTES(1);
+ s32 loop_count{};
u64 context_address{};
u64 context_size{};
- INSERT_PADDING_BYTES(8);
+ u32 loop_start_sample{};
+ u32 loop_end_sample{};
};
static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer is an invalid size");
@@ -76,6 +78,9 @@ struct ServerWaveBuffer {
bool end_of_stream{};
VAddr context_address{};
std::size_t context_size{};
+ s32 loop_count{};
+ u32 loop_start_sample{};
+ u32 loop_end_sample{};
bool sent_to_dsp{true};
};
@@ -108,6 +113,7 @@ struct VoiceState {
u32 external_context_size;
bool is_external_context_used;
bool voice_dropped;
+ s32 loop_count;
};
class VoiceChannelResource {
@@ -206,7 +212,7 @@ public:
float last_volume{};
std::array<BiquadFilterParameter, AudioCommon::MAX_BIQUAD_FILTERS> biquad_filter{};
s32 wave_buffer_count{};
- s16 wave_bufffer_head{};
+ s16 wave_buffer_head{};
INSERT_PADDING_BYTES(2);
BehaviorFlags behavior_flags{};
VAddr additional_params_address{};
@@ -252,6 +258,7 @@ public:
void FlushWaveBuffers(u8 flush_count,
std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
s32 channel_count);
+ void SetWaveBufferCompleted(VoiceState& dsp_state, const ServerWaveBuffer& wave_buffer);
private:
std::vector<s16> stored_samples;
diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp
index 077f34995..274f57659 100644
--- a/src/common/fs/file.cpp
+++ b/src/common/fs/file.cpp
@@ -306,9 +306,9 @@ bool IOFile::Flush() const {
errno = 0;
#ifdef _WIN32
- const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+ const auto flush_result = std::fflush(file) == 0;
#else
- const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+ const auto flush_result = std::fflush(file) == 0;
#endif
if (!flush_result) {
@@ -320,6 +320,28 @@ bool IOFile::Flush() const {
return flush_result;
}
+bool IOFile::Commit() const {
+ if (!IsOpen()) {
+ return false;
+ }
+
+ errno = 0;
+
+#ifdef _WIN32
+ const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+#else
+ const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+#endif
+
+ if (!commit_result) {
+ const auto ec = std::error_code{errno, std::generic_category()};
+ LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}",
+ PathToUTF8String(file_path), ec.message());
+ }
+
+ return commit_result;
+}
+
bool IOFile::SetSize(u64 size) const {
if (!IsOpen()) {
return false;
@@ -347,6 +369,9 @@ u64 IOFile::GetSize() const {
return 0;
}
+ // Flush any unwritten buffered data into the file prior to retrieving the file size.
+ std::fflush(file);
+
std::error_code ec;
const auto file_size = fs::file_size(file_path, ec);
diff --git a/src/common/fs/file.h b/src/common/fs/file.h
index 588fe619d..2c4ab4332 100644
--- a/src/common/fs/file.h
+++ b/src/common/fs/file.h
@@ -396,13 +396,22 @@ public:
[[nodiscard]] size_t WriteString(std::span<const char> string) const;
/**
- * Attempts to flush any unwritten buffered data into the file and flush the file into the disk.
+ * Attempts to flush any unwritten buffered data into the file.
*
* @returns True if the flush was successful, false otherwise.
*/
bool Flush() const;
/**
+ * Attempts to commit the file into the disk.
+ * Note that this is an expensive operation as this forces the operating system to write
+ * the contents of the file associated with the file descriptor into the disk.
+ *
+ * @returns True if the commit was successful, false otherwise.
+ */
+ bool Commit() const;
+
+ /**
* Resizes the file to a given size.
* If the file is resized to a smaller size, the remainder of the file is discarded.
* If the file is resized to a larger size, the new area appears as if zero-filled.
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index b6fa4affb..61dddab3f 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -171,19 +171,22 @@ FileBackend::FileBackend(const std::filesystem::path& filename) {
FileBackend::~FileBackend() = default;
void FileBackend::Write(const Entry& entry) {
+ if (!file->IsOpen()) {
+ return;
+ }
+
using namespace Common::Literals;
- // prevent logs from going over the maximum size (in case its spamming and the user doesn't
- // know)
+ // Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB;
constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB;
- if (!file->IsOpen()) {
- return;
- }
+ const bool write_limit_exceeded =
+ bytes_written > MAX_BYTES_WRITTEN_EXTENDED ||
+ (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging);
- if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) {
- return;
- } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) {
+ // Close the file after the write limit is exceeded.
+ if (write_limit_exceeded) {
+ file->Close();
return;
}
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 83b5b7676..b2b0dbe05 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -654,24 +654,19 @@ endif()
if (MSVC)
target_compile_options(core PRIVATE
- /we4018 # 'expression' : signed/unsigned mismatch
- /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
- /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
+ /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+ /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
+ /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
/we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
- /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
- /we4305 # 'context' : truncation from 'type1' to 'type2'
/we4456 # Declaration of 'identifier' hides previous local declaration
/we4457 # Declaration of 'identifier' hides function parameter
/we4458 # Declaration of 'identifier' hides class member
/we4459 # Declaration of 'identifier' hides global declaration
- /we4715 # 'function' : not all control paths return a value
)
else()
target_compile_options(core PRIVATE
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=sign-compare
-Werror=shadow
$<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 53b8b7ca0..7c0950bb0 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -345,8 +345,10 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type,
const Service::FileSystem::FileSystemController& fs_controller) {
const auto load_dir = fs_controller.GetModificationLoadRoot(title_id);
+ const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
- load_dir == nullptr || load_dir->GetSize() <= 0) {
+ ((load_dir == nullptr || load_dir->GetSize() <= 0) &&
+ (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) {
return;
}
@@ -356,7 +358,10 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
}
const auto& disabled = Settings::values.disabled_addons[title_id];
- auto patch_dirs = load_dir->GetSubdirectories();
+ std::vector<VirtualDir> patch_dirs = load_dir->GetSubdirectories();
+ if (std::find(disabled.cbegin(), disabled.cend(), "SDMC") == disabled.cend()) {
+ patch_dirs.push_back(sdmc_load_dir);
+ }
std::sort(patch_dirs.begin(), patch_dirs.end(),
[](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
@@ -402,7 +407,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
}
VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type,
- VirtualFile update_raw) const {
+ VirtualFile update_raw, bool apply_layeredfs) const {
const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}",
title_id, static_cast<u8>(type));
@@ -442,7 +447,9 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
}
// LayeredFS
- ApplyLayeredFS(romfs, title_id, type, fs_controller);
+ if (apply_layeredfs) {
+ ApplyLayeredFS(romfs, title_id, type, fs_controller);
+ }
return romfs;
}
@@ -524,6 +531,15 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
}
}
+ // SDMC mod directory (RomFS LayeredFS)
+ const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
+ if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0 &&
+ IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "romfs"))) {
+ const auto mod_disabled =
+ std::find(disabled.begin(), disabled.end(), "SDMC") != disabled.end();
+ out.insert_or_assign(mod_disabled ? "[D] SDMC" : "SDMC", "LayeredFS");
+ }
+
// DLC
const auto dlc_entries =
content_provider.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data);
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index fb1853035..3be871f35 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -64,7 +64,8 @@ public:
// - LayeredFS
[[nodiscard]] VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset,
ContentRecordType type = ContentRecordType::Program,
- VirtualFile update_raw = nullptr) const;
+ VirtualFile update_raw = nullptr,
+ bool apply_layeredfs = true) const;
// Returns a vector of pairs between patch names and patch versions.
// i.e. Update 3.2.2 will return {"Update", "3.2.2"}
diff --git a/src/core/file_sys/sdmc_factory.cpp b/src/core/file_sys/sdmc_factory.cpp
index cb56d8f2d..e5c72cd4d 100644
--- a/src/core/file_sys/sdmc_factory.cpp
+++ b/src/core/file_sys/sdmc_factory.cpp
@@ -12,23 +12,32 @@ namespace FileSys {
constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB
-SDMCFactory::SDMCFactory(VirtualDir dir_)
- : dir(std::move(dir_)), contents(std::make_unique<RegisteredCache>(
- GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/registered"),
- [](const VirtualFile& file, const NcaID& id) {
- return NAX{file, id}.GetDecrypted();
- })),
+SDMCFactory::SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_)
+ : sd_dir(std::move(sd_dir_)), sd_mod_dir(std::move(sd_mod_dir_)),
+ contents(std::make_unique<RegisteredCache>(
+ GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/registered"),
+ [](const VirtualFile& file, const NcaID& id) {
+ return NAX{file, id}.GetDecrypted();
+ })),
placeholder(std::make_unique<PlaceholderCache>(
- GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/placehld"))) {}
+ GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/placehld"))) {}
SDMCFactory::~SDMCFactory() = default;
ResultVal<VirtualDir> SDMCFactory::Open() const {
- return MakeResult<VirtualDir>(dir);
+ return MakeResult<VirtualDir>(sd_dir);
+}
+
+VirtualDir SDMCFactory::GetSDMCModificationLoadRoot(u64 title_id) const {
+ // LayeredFS doesn't work on updates and title id-less homebrew
+ if (title_id == 0 || (title_id & 0xFFF) == 0x800) {
+ return nullptr;
+ }
+ return GetOrCreateDirectoryRelative(sd_mod_dir, fmt::format("/{:016X}", title_id));
}
VirtualDir SDMCFactory::GetSDMCContentDirectory() const {
- return GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents");
+ return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents");
}
RegisteredCache* SDMCFactory::GetSDMCContents() const {
@@ -40,11 +49,11 @@ PlaceholderCache* SDMCFactory::GetSDMCPlaceholder() const {
}
VirtualDir SDMCFactory::GetImageDirectory() const {
- return GetOrCreateDirectoryRelative(dir, "/Nintendo/Album");
+ return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Album");
}
u64 SDMCFactory::GetSDMCFreeSpace() const {
- return GetSDMCTotalSpace() - dir->GetSize();
+ return GetSDMCTotalSpace() - sd_dir->GetSize();
}
u64 SDMCFactory::GetSDMCTotalSpace() const {
diff --git a/src/core/file_sys/sdmc_factory.h b/src/core/file_sys/sdmc_factory.h
index 2bb92ba93..3a3d11f3a 100644
--- a/src/core/file_sys/sdmc_factory.h
+++ b/src/core/file_sys/sdmc_factory.h
@@ -16,11 +16,12 @@ class PlaceholderCache;
/// File system interface to the SDCard archive
class SDMCFactory {
public:
- explicit SDMCFactory(VirtualDir dir);
+ explicit SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_);
~SDMCFactory();
ResultVal<VirtualDir> Open() const;
+ VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
VirtualDir GetSDMCContentDirectory() const;
RegisteredCache* GetSDMCContents() const;
@@ -32,7 +33,8 @@ public:
u64 GetSDMCTotalSpace() const;
private:
- VirtualDir dir;
+ VirtualDir sd_dir;
+ VirtualDir sd_mod_dir;
std::unique_ptr<RegisteredCache> contents;
std::unique_ptr<PlaceholderCache> placeholder;
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 61bda3786..ceff2532d 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -345,8 +345,12 @@ public:
explicit RequestParser(u32* command_buffer) : RequestHelperBase(command_buffer) {}
explicit RequestParser(Kernel::HLERequestContext& ctx) : RequestHelperBase(ctx) {
- ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete");
- Skip(ctx.GetDataPayloadOffset(), false);
+ // TIPC does not have data payload offset
+ if (!ctx.IsTipc()) {
+ ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete");
+ Skip(ctx.GetDataPayloadOffset(), false);
+ }
+
// Skip the u64 command id, it's already stored in the context
static constexpr u32 CommandIdSize = 2;
Skip(CommandIdSize, false);
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index fec704c65..dd945e058 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -117,7 +117,7 @@ AOC_U::AOC_U(Core::System& system_)
{7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
{8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
{9, nullptr, "GetAddOnContentLostErrorCode"},
- {10, nullptr, "GetAddOnContentListChangedEventWithProcessId"},
+ {10, &AOC_U::GetAddOnContentListChangedEventWithProcessId, "GetAddOnContentListChangedEventWithProcessId"},
{100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"},
{101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"},
{110, nullptr, "CreateContentsServiceManager"},
@@ -257,6 +257,14 @@ void AOC_U::GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx) {
rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
}
+void AOC_U::GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_AOC, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(ResultSuccess);
+ rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
+}
+
void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_AOC, "(STUBBED) called");
diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h
index 65095baa2..bb6ffb8eb 100644
--- a/src/core/hle/service/aoc/aoc_u.h
+++ b/src/core/hle/service/aoc/aoc_u.h
@@ -28,6 +28,7 @@ private:
void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx);
void PrepareAddOnContent(Kernel::HLERequestContext& ctx);
void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx);
+ void GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx);
void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 800feba6e..b769fe959 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -96,7 +96,7 @@ private:
void RequestUpdateImpl(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "(STUBBED) called");
- std::vector<u8> output_params(ctx.GetWriteBufferSize());
+ std::vector<u8> output_params(ctx.GetWriteBufferSize(), 0);
auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer(), output_params);
if (result.IsSuccess()) {
@@ -110,17 +110,19 @@ private:
void Start(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
- IPC::ResponseBuilder rb{ctx, 2};
+ const auto result = renderer->Start();
- rb.Push(ResultSuccess);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(result);
}
void Stop(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
- IPC::ResponseBuilder rb{ctx, 2};
+ const auto result = renderer->Stop();
- rb.Push(ResultSuccess);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(result);
}
void QuerySystemEvent(Kernel::HLERequestContext& ctx) {
@@ -288,7 +290,7 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- rb.Push<u32>(1);
+ rb.Push<u32>(2);
}
// Should be similar to QueryAudioDeviceOutputEvent
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 10e6f7a64..33a6dbbb6 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -253,7 +253,11 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
rb.Push<u32>(worker_buffer_sz);
}
-void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
+void HwOpus::GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx) {
+ GetWorkBufferSize(ctx);
+}
+
+void HwOpus::OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto sample_rate = rp.Pop<u32>();
const auto channel_count = rp.Pop<u32>();
@@ -291,14 +295,47 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
}
+void HwOpus::OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto sample_rate = rp.Pop<u32>();
+ const auto channel_count = rp.Pop<u32>();
+
+ LOG_CRITICAL(Audio, "called sample_rate={}, channel_count={}", sample_rate, channel_count);
+
+ ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
+ sample_rate == 12000 || sample_rate == 8000,
+ "Invalid sample rate");
+ ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
+
+ const int num_stereo_streams = channel_count == 2 ? 1 : 0;
+ const auto mapping_table = CreateMappingTable(channel_count);
+
+ int error = 0;
+ OpusDecoderPtr decoder{
+ opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
+ num_stereo_streams, mapping_table.data(), &error)};
+ if (error != OPUS_OK || decoder == nullptr) {
+ LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
+ IPC::ResponseBuilder rb{ctx, 2};
+ // TODO(ogniK): Use correct error code
+ rb.Push(ResultUnknown);
+ return;
+ }
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IHardwareOpusDecoderManager>(
+ system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
+}
+
HwOpus::HwOpus(Core::System& system_) : ServiceFramework{system_, "hwopus"} {
static const FunctionInfo functions[] = {
- {0, &HwOpus::OpenOpusDecoder, "OpenOpusDecoder"},
+ {0, &HwOpus::OpenHardwareOpusDecoder, "OpenHardwareOpusDecoder"},
{1, &HwOpus::GetWorkBufferSize, "GetWorkBufferSize"},
{2, nullptr, "OpenOpusDecoderForMultiStream"},
{3, nullptr, "GetWorkBufferSizeForMultiStream"},
- {4, nullptr, "OpenHardwareOpusDecoderEx"},
- {5, nullptr, "GetWorkBufferSizeEx"},
+ {4, &HwOpus::OpenHardwareOpusDecoderEx, "OpenHardwareOpusDecoderEx"},
+ {5, &HwOpus::GetWorkBufferSizeEx, "GetWorkBufferSizeEx"},
{6, nullptr, "OpenHardwareOpusDecoderForMultiStreamEx"},
{7, nullptr, "GetWorkBufferSizeForMultiStreamEx"},
};
diff --git a/src/core/hle/service/audio/hwopus.h b/src/core/hle/service/audio/hwopus.h
index 4f921f18e..b74824ff3 100644
--- a/src/core/hle/service/audio/hwopus.h
+++ b/src/core/hle/service/audio/hwopus.h
@@ -18,8 +18,10 @@ public:
~HwOpus() override;
private:
- void OpenOpusDecoder(Kernel::HLERequestContext& ctx);
+ void OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx);
+ void OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx);
void GetWorkBufferSize(Kernel::HLERequestContext& ctx);
+ void GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx);
};
} // namespace Service::Audio
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 3c16fe6c7..4a9b13e45 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -703,6 +703,16 @@ FileSys::VirtualDir FileSystemController::GetModificationLoadRoot(u64 title_id)
return bis_factory->GetModificationLoadRoot(title_id);
}
+FileSys::VirtualDir FileSystemController::GetSDMCModificationLoadRoot(u64 title_id) const {
+ LOG_TRACE(Service_FS, "Opening SDMC mod load root for tid={:016X}", title_id);
+
+ if (sdmc_factory == nullptr) {
+ return nullptr;
+ }
+
+ return sdmc_factory->GetSDMCModificationLoadRoot(title_id);
+}
+
FileSys::VirtualDir FileSystemController::GetModificationDumpRoot(u64 title_id) const {
LOG_TRACE(Service_FS, "Opening mod dump root for tid={:016X}", title_id);
@@ -733,20 +743,23 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
}
using YuzuPath = Common::FS::YuzuPath;
+ const auto sdmc_dir_path = Common::FS::GetYuzuPath(YuzuPath::SDMCDir);
+ const auto sdmc_load_dir_path = sdmc_dir_path / "atmosphere/contents";
const auto rw_mode = FileSys::Mode::ReadWrite;
auto nand_directory =
vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::NANDDir), rw_mode);
- auto sd_directory =
- vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::SDMCDir), rw_mode);
+ auto sd_directory = vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_dir_path), rw_mode);
auto load_directory =
vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::LoadDir), FileSys::Mode::Read);
+ auto sd_load_directory =
+ vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_load_dir_path), FileSys::Mode::Read);
auto dump_directory =
vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::DumpDir), rw_mode);
if (bis_factory == nullptr) {
- bis_factory =
- std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory);
+ bis_factory = std::make_unique<FileSys::BISFactory>(
+ nand_directory, std::move(load_directory), std::move(dump_directory));
system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SysNAND,
bis_factory->GetSystemNANDContents());
system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::UserNAND,
@@ -759,7 +772,8 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
}
if (sdmc_factory == nullptr) {
- sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory));
+ sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory),
+ std::move(sd_load_directory));
system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
sdmc_factory->GetSDMCContents());
}
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index b6b1b9220..d387af3cb 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -115,6 +115,7 @@ public:
FileSys::VirtualDir GetContentDirectory(ContentStorageId id) const;
FileSys::VirtualDir GetImageDirectory(ImageDirectoryId id) const;
+ FileSys::VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) const;
FileSys::VirtualDir GetModificationDumpRoot(u64 title_id) const;
diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp
index 114aff31c..869d2763f 100644
--- a/src/core/hle/service/mii/manager.cpp
+++ b/src/core/hle/service/mii/manager.cpp
@@ -20,6 +20,7 @@ namespace {
constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4};
+constexpr std::size_t BaseMiiCount{2};
constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()};
constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'};
@@ -415,7 +416,7 @@ u32 MiiManager::GetCount(SourceFlag source_flag) const {
count += 0;
}
if ((source_flag & SourceFlag::Default) != SourceFlag::None) {
- count += DefaultMiiCount;
+ count += (DefaultMiiCount - BaseMiiCount);
}
return static_cast<u32>(count);
}
@@ -445,7 +446,7 @@ ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_
return MakeResult(std::move(result));
}
- for (std::size_t index = 0; index < DefaultMiiCount; index++) {
+ for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) {
result.emplace_back(BuildDefault(index), Source::Default);
}
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 7c5763f9c..c4283a952 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -34,28 +34,17 @@ if (MSVC)
/W4
/WX
- # 'expression' : signed/unsigned mismatch
- /we4018
- # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
- /we4244
- # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
- /we4245
- # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
- /we4254
- # 'var' : conversion from 'size_t' to 'type', possible loss of data
- /we4267
- # 'context' : truncation from 'type1' to 'type2'
- /we4305
+ /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+ /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
+ /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
+ /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
)
else()
target_compile_options(input_common PRIVATE
-Werror
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=reorder
-Werror=shadow
- -Werror=sign-compare
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
-Werror=unused-variable
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index 100138d11..2fafd077f 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -27,6 +27,7 @@ public:
down->SetCallback(callbacks);
left->SetCallback(callbacks);
right->SetCallback(callbacks);
+ modifier->SetCallback(callbacks);
}
bool IsAngleGreater(float old_angle, float new_angle) const {
diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp
index 320f51ee6..a2f1bb67c 100644
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -5,14 +5,7 @@
#include <chrono>
#include <thread>
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4200) // nonstandard extension used : zero-sized array in struct/union
-#endif
#include <libusb.h>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
#include "common/logging/log.h"
#include "common/param_package.h"
diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h
index a3d276697..1bdc9209e 100644
--- a/src/input_common/udp/protocol.h
+++ b/src/input_common/udp/protocol.h
@@ -8,14 +8,7 @@
#include <optional>
#include <type_traits>
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4701)
-#endif
#include <boost/crc.hpp>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
#include "common/bit_field.h"
#include "common/swap.h"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f9454bbaa..e4de55f4d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -292,12 +292,12 @@ endif()
if (MSVC)
target_compile_options(video_core PRIVATE
- /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+ /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+ /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
/we4456 # Declaration of 'identifier' hides previous local declaration
/we4457 # Declaration of 'identifier' hides function parameter
/we4458 # Declaration of 'identifier' hides class member
/we4459 # Declaration of 'identifier' hides global declaration
- /we4715 # 'function' : not all control paths return a value
)
else()
target_compile_options(video_core PRIVATE
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9d726a6fb..cad7f902d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -99,7 +99,7 @@ class BufferCache {
};
public:
- static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB;
+ static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -109,8 +109,6 @@ public:
void TickFrame();
- void RunGarbageCollector();
-
void WriteMemory(VAddr cpu_addr, u64 size);
void CachedWriteMemory(VAddr cpu_addr, u64 size);
@@ -197,6 +195,8 @@ private:
((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
}
+ void RunGarbageCollector();
+
void BindHostIndexBuffer();
void BindHostVertexBuffers();
@@ -416,8 +416,9 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
template <class P>
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
- ForEachBufferInRange(cpu_addr, size,
- [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+ DownloadBufferMemory(buffer, cpu_addr, size);
+ });
}
template <class P>
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a3fda1094..8b86ad050 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
static_cast<u32>(nvdec_thi_state.method_0));
- nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
- data);
+ nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
break;
default:
break;
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index d02dc6260..1b4bbc8ac 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {
av_free(ptr);
}
-Codec::Codec(GPU& gpu_)
- : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
+ : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
Codec::~Codec() {
@@ -43,46 +43,48 @@ Codec::~Codec() {
avcodec_close(av_codec_ctx);
}
+void Codec::Initialize() {
+ AVCodecID codec{AV_CODEC_ID_NONE};
+ switch (current_codec) {
+ case NvdecCommon::VideoCodec::H264:
+ codec = AV_CODEC_ID_H264;
+ break;
+ case NvdecCommon::VideoCodec::Vp9:
+ codec = AV_CODEC_ID_VP9;
+ break;
+ default:
+ return;
+ }
+ av_codec = avcodec_find_decoder(codec);
+ av_codec_ctx = avcodec_alloc_context3(av_codec);
+ av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+
+ // TODO(ameerj): libavcodec gpu hw acceleration
+
+ const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
+ if (av_error < 0) {
+ LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
+ avcodec_close(av_codec_ctx);
+ return;
+ }
+ initialized = true;
+ return;
+}
+
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
if (current_codec != codec) {
- LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
current_codec = codec;
+ LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
}
}
-void Codec::StateWrite(u32 offset, u64 arguments) {
- u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
- std::memcpy(state_offset, &arguments, sizeof(u64));
-}
-
void Codec::Decode() {
- bool is_first_frame = false;
+ const bool is_first_frame = !initialized;
if (!initialized) {
- if (current_codec == NvdecCommon::VideoCodec::H264) {
- av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
- } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
- av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
- } else {
- LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
- return;
- }
-
- av_codec_ctx = avcodec_alloc_context3(av_codec);
- av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-
- // TODO(ameerj): libavcodec gpu hw acceleration
-
- const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
- if (av_error < 0) {
- LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
- avcodec_close(av_codec_ctx);
- return;
- }
- initialized = true;
- is_first_frame = true;
+ Initialize();
}
- bool vp9_hidden_frame = false;
+ bool vp9_hidden_frame = false;
AVPacket packet{};
av_init_packet(&packet);
std::vector<u8> frame_data;
@@ -95,7 +97,7 @@ void Codec::Decode() {
}
packet.data = frame_data.data();
- packet.size = static_cast<int>(frame_data.size());
+ packet.size = static_cast<s32>(frame_data.size());
avcodec_send_packet(av_codec_ctx, &packet);
@@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
return current_codec;
}
+std::string_view Codec::GetCurrentCodecName() const {
+ switch (current_codec) {
+ case NvdecCommon::VideoCodec::None:
+ return "None";
+ case NvdecCommon::VideoCodec::H264:
+ return "H264";
+ case NvdecCommon::VideoCodec::Vp8:
+ return "VP8";
+ case NvdecCommon::VideoCodec::H265:
+ return "H265";
+ case NvdecCommon::VideoCodec::Vp9:
+ return "VP9";
+ default:
+ return "Unknown";
+ }
+};
+
} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 8a2a6c360..96c823c76 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -34,15 +34,15 @@ class VP9;
class Codec {
public:
- explicit Codec(GPU& gpu);
+ explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
~Codec();
+ /// Initialize the codec, returning success or failure
+ void Initialize();
+
/// Sets NVDEC video stream codec
void SetTargetCodec(NvdecCommon::VideoCodec codec);
- /// Populate NvdecRegisters state with argument value at the provided offset
- void StateWrite(u32 offset, u64 arguments);
-
/// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode();
@@ -51,6 +51,8 @@ public:
/// Returns the value of current_codec
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+ /// Return name of the current codec
+ [[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
bool initialized{};
@@ -60,10 +62,10 @@ private:
AVCodecContext* av_codec_ctx{nullptr};
GPU& gpu;
+ const NvdecCommon::NvdecRegisters& state;
std::unique_ptr<Decoder::H264> h264_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
- NvdecCommon::NvdecRegisters state{};
std::queue<AVFramePtr> av_frames{};
};
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index fea6aed98..5fb6d45ee 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -45,134 +45,129 @@ H264::~H264() = default;
const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
- H264DecoderContext context{};
+ H264DecoderContext context;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
- const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
+ const s64 frame_number = context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) {
- frame.resize(context.frame_data_size);
-
+ frame.resize(context.stream_len);
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
- } else {
- /// Encode header
- H264BitWriter writer{};
- writer.WriteU(1, 24);
- writer.WriteU(0, 1);
- writer.WriteU(3, 2);
- writer.WriteU(7, 5);
- writer.WriteU(100, 8);
- writer.WriteU(0, 8);
- writer.WriteU(31, 8);
- writer.WriteUe(0);
- const auto chroma_format_idc =
- static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
- writer.WriteUe(chroma_format_idc);
- if (chroma_format_idc == 3) {
- writer.WriteBit(false);
- }
-
- writer.WriteUe(0);
- writer.WriteUe(0);
- writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
- writer.WriteBit(false); // Scaling matrix present flag
-
- const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
- writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
- writer.WriteUe(order_cnt_type);
- if (order_cnt_type == 0) {
- writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
- } else if (order_cnt_type == 1) {
- writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
-
- writer.WriteSe(0);
- writer.WriteSe(0);
- writer.WriteUe(0);
- }
-
- const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
- (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+ return frame;
+ }
- writer.WriteUe(16);
+ // Encode header
+ H264BitWriter writer{};
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(7, 5);
+ writer.WriteU(100, 8);
+ writer.WriteU(0, 8);
+ writer.WriteU(31, 8);
+ writer.WriteUe(0);
+ const u32 chroma_format_idc =
+ static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
+ writer.WriteUe(chroma_format_idc);
+ if (chroma_format_idc == 3) {
writer.WriteBit(false);
- writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
- writer.WriteUe(pic_height - 1);
- writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
-
- if (!context.h264_parameter_set.frame_mbs_only_flag) {
- writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
- }
+ }
- writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
- writer.WriteBit(false); // Frame cropping flag
- writer.WriteBit(false); // VUI parameter present flag
+ writer.WriteUe(0);
+ writer.WriteUe(0);
+ writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
+ writer.WriteBit(false); // Scaling matrix present flag
- writer.End();
+ writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
- // H264 PPS
- writer.WriteU(1, 24);
- writer.WriteU(0, 1);
- writer.WriteU(3, 2);
- writer.WriteU(8, 5);
+ const auto order_cnt_type =
+ static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
+ writer.WriteUe(order_cnt_type);
+ if (order_cnt_type == 0) {
+ writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
+ } else if (order_cnt_type == 1) {
+ writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
+ writer.WriteSe(0);
+ writer.WriteSe(0);
writer.WriteUe(0);
- writer.WriteUe(0);
+ }
- writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
- writer.WriteBit(false);
- writer.WriteUe(0);
- writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
- writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
- writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
- writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
- s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
- pic_init_qp = (pic_init_qp << 26) >> 26;
- writer.WriteSe(pic_init_qp);
- writer.WriteSe(0);
- s32 chroma_qp_index_offset =
- static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
- chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
+ const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
+ (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+
+ writer.WriteUe(16);
+ writer.WriteBit(false);
+ writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
+ writer.WriteUe(pic_height - 1);
+ writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
- writer.WriteSe(chroma_qp_index_offset);
- writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
- writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
- writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
- writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+ if (!context.h264_parameter_set.frame_mbs_only_flag) {
+ writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
+ }
+ writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
+ writer.WriteBit(false); // Frame cropping flag
+ writer.WriteBit(false); // VUI parameter present flag
+
+ writer.End();
+
+ // H264 PPS
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(8, 5);
+
+ writer.WriteUe(0);
+ writer.WriteUe(0);
+
+ writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
+ writer.WriteBit(false);
+ writer.WriteUe(0);
+ writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
+ writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
+ writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
+ writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
+ s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
+ writer.WriteSe(pic_init_qp);
+ writer.WriteSe(0);
+ s32 chroma_qp_index_offset =
+ static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
+
+ writer.WriteSe(chroma_qp_index_offset);
+ writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
+ writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
+ writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
+ writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+
+ writer.WriteBit(true);
+
+ for (s32 index = 0; index < 6; index++) {
writer.WriteBit(true);
+ std::span<const u8> matrix{context.weight_scale};
+ writer.WriteScalingList(matrix, index * 16, 16);
+ }
- for (s32 index = 0; index < 6; index++) {
+ if (context.h264_parameter_set.transform_8x8_mode_flag) {
+ for (s32 index = 0; index < 2; index++) {
writer.WriteBit(true);
- const auto matrix_x4 =
- std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
- writer.WriteScalingList(matrix_x4, index * 16, 16);
- }
-
- if (context.h264_parameter_set.transform_8x8_mode_flag) {
- for (s32 index = 0; index < 2; index++) {
- writer.WriteBit(true);
- const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
- context.scaling_matrix_8.end());
-
- writer.WriteScalingList(matrix_x8, index * 64, 64);
- }
+ std::span<const u8> matrix{context.weight_scale_8x8};
+ writer.WriteScalingList(matrix, index * 64, 64);
}
+ }
- s32 chroma_qp_index_offset2 =
- static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
- chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
+ s32 chroma_qp_index_offset2 =
+ static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
- writer.WriteSe(chroma_qp_index_offset2);
+ writer.WriteSe(chroma_qp_index_offset2);
- writer.End();
+ writer.End();
- const auto& encoded_header = writer.GetByteArray();
- frame.resize(encoded_header.size() + context.frame_data_size);
- std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
+ const auto& encoded_header = writer.GetByteArray();
+ frame.resize(encoded_header.size() + context.stream_len);
+ std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
- gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
- frame.data() + encoded_header.size(),
- context.frame_data_size);
- }
+ gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+ frame.data() + encoded_header.size(), context.stream_len);
return frame;
}
@@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {
WriteBits(state ? 1 : 0, 1);
}
-void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
+void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
std::vector<u8> scan(count);
if (count == 16) {
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 0f3a1d9f3..bfe84a472 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -20,7 +20,9 @@
#pragma once
+#include <span>
#include <vector>
+#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/command_classes/nvdec_common.h"
@@ -48,7 +50,7 @@ public:
/// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
/// Writes the scaling matrices of the sream
- void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
+ void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
/// Return the bitstream as a vector.
[[nodiscard]] std::vector<u8>& GetByteArray();
@@ -78,40 +80,110 @@ public:
const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
private:
+ std::vector<u8> frame;
+ GPU& gpu;
+
struct H264ParameterSet {
- u32 log2_max_pic_order_cnt{};
- u32 delta_pic_order_always_zero_flag{};
- u32 frame_mbs_only_flag{};
- u32 pic_width_in_mbs{};
- u32 pic_height_in_map_units{};
- INSERT_PADDING_WORDS(1);
- u32 entropy_coding_mode_flag{};
- u32 bottom_field_pic_order_flag{};
- u32 num_refidx_l0_default_active{};
- u32 num_refidx_l1_default_active{};
- u32 deblocking_filter_control_flag{};
- u32 redundant_pic_count_flag{};
- u32 transform_8x8_mode_flag{};
- INSERT_PADDING_WORDS(9);
- u64 flags{};
- u32 frame_number{};
- u32 frame_number2{};
+ s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
+ s32 delta_pic_order_always_zero_flag; ///< 0x04
+ s32 frame_mbs_only_flag; ///< 0x08
+ u32 pic_width_in_mbs; ///< 0x0C
+ u32 frame_height_in_map_units; ///< 0x10
+ union { ///< 0x14
+ BitField<0, 2, u32> tile_format;
+ BitField<2, 3, u32> gob_height;
+ };
+ u32 entropy_coding_mode_flag; ///< 0x18
+ s32 pic_order_present_flag; ///< 0x1C
+ s32 num_refidx_l0_default_active; ///< 0x20
+ s32 num_refidx_l1_default_active; ///< 0x24
+ s32 deblocking_filter_control_present_flag; ///< 0x28
+ s32 redundant_pic_cnt_present_flag; ///< 0x2C
+ u32 transform_8x8_mode_flag; ///< 0x30
+ u32 pitch_luma; ///< 0x34
+ u32 pitch_chroma; ///< 0x38
+ u32 luma_top_offset; ///< 0x3C
+ u32 luma_bot_offset; ///< 0x40
+ u32 luma_frame_offset; ///< 0x44
+ u32 chroma_top_offset; ///< 0x48
+ u32 chroma_bot_offset; ///< 0x4C
+ u32 chroma_frame_offset; ///< 0x50
+ u32 hist_buffer_size; ///< 0x54
+ union { ///< 0x58
+ union {
+ BitField<0, 1, u64> mbaff_frame;
+ BitField<1, 1, u64> direct_8x8_inference;
+ BitField<2, 1, u64> weighted_pred;
+ BitField<3, 1, u64> constrained_intra_pred;
+ BitField<4, 1, u64> ref_pic;
+ BitField<5, 1, u64> field_pic;
+ BitField<6, 1, u64> bottom_field;
+ BitField<7, 1, u64> second_field;
+ } flags;
+ BitField<8, 4, u64> log2_max_frame_num_minus4;
+ BitField<12, 2, u64> chroma_format_idc;
+ BitField<14, 2, u64> pic_order_cnt_type;
+ BitField<16, 6, s64> pic_init_qp_minus26;
+ BitField<22, 5, s64> chroma_qp_index_offset;
+ BitField<27, 5, s64> second_chroma_qp_index_offset;
+ BitField<32, 2, u64> weighted_bipred_idc;
+ BitField<34, 7, u64> curr_pic_idx;
+ BitField<41, 5, u64> curr_col_idx;
+ BitField<46, 16, u64> frame_number;
+ BitField<62, 1, u64> frame_surfaces;
+ BitField<63, 1, u64> output_memory_layout;
+ };
};
- static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
+ static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
struct H264DecoderContext {
- INSERT_PADDING_BYTES(0x48);
- u32 frame_data_size{};
- INSERT_PADDING_BYTES(0xc);
- H264ParameterSet h264_parameter_set{};
- INSERT_PADDING_BYTES(0x100);
- std::array<u8, 0x60> scaling_matrix_4;
- std::array<u8, 0x80> scaling_matrix_8;
+ INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
+ u32 stream_len; ///< 0x0048
+ INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
+ H264ParameterSet h264_parameter_set; ///< 0x0058
+ INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
+ std::array<u8, 0x60> weight_scale; ///< 0x01C0
+ std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
};
- static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
-
- std::vector<u8> frame;
- GPU& gpu;
+ static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(H264ParameterSet, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
+ ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
+ ASSERT_POSITION(frame_mbs_only_flag, 0x08);
+ ASSERT_POSITION(pic_width_in_mbs, 0x0C);
+ ASSERT_POSITION(frame_height_in_map_units, 0x10);
+ ASSERT_POSITION(tile_format, 0x14);
+ ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
+ ASSERT_POSITION(pic_order_present_flag, 0x1C);
+ ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
+ ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
+ ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
+ ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
+ ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
+ ASSERT_POSITION(pitch_luma, 0x34);
+ ASSERT_POSITION(pitch_chroma, 0x38);
+ ASSERT_POSITION(luma_top_offset, 0x3C);
+ ASSERT_POSITION(luma_bot_offset, 0x40);
+ ASSERT_POSITION(luma_frame_offset, 0x44);
+ ASSERT_POSITION(chroma_top_offset, 0x48);
+ ASSERT_POSITION(chroma_bot_offset, 0x4C);
+ ASSERT_POSITION(chroma_frame_offset, 0x50);
+ ASSERT_POSITION(hist_buffer_size, 0x54);
+ ASSERT_POSITION(flags, 0x58);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(H264DecoderContext, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ ASSERT_POSITION(stream_len, 0x48);
+ ASSERT_POSITION(h264_parameter_set, 0x58);
+ ASSERT_POSITION(weight_scale, 0x1C0);
+#undef ASSERT_POSITION
};
} // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 29bb31418..902bc2a98 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
}
Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
- PictureInfo picture_info{};
+ PictureInfo picture_info;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
Vp9PictureInfo vp9_info = picture_info.Convert();
@@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
}
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
- EntropyProbs entropy{};
+ EntropyProbs entropy;
gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
entropy.Convert(dst);
}
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 139501a1c..2da14f3ca 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -15,10 +15,10 @@ class GPU;
namespace Decoder {
struct Vp9FrameDimensions {
- s16 width{};
- s16 height{};
- s16 luma_pitch{};
- s16 chroma_pitch{};
+ s16 width;
+ s16 height;
+ s16 luma_pitch;
+ s16 chroma_pitch;
};
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
@@ -49,87 +49,87 @@ enum class TxMode {
};
struct Segmentation {
- u8 enabled{};
- u8 update_map{};
- u8 temporal_update{};
- u8 abs_delta{};
- std::array<u32, 8> feature_mask{};
- std::array<std::array<s16, 4>, 8> feature_data{};
+ u8 enabled;
+ u8 update_map;
+ u8 temporal_update;
+ u8 abs_delta;
+ std::array<u32, 8> feature_mask;
+ std::array<std::array<s16, 4>, 8> feature_data;
};
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
struct LoopFilter {
- u8 mode_ref_delta_enabled{};
- std::array<s8, 4> ref_deltas{};
- std::array<s8, 2> mode_deltas{};
+ u8 mode_ref_delta_enabled;
+ std::array<s8, 4> ref_deltas;
+ std::array<s8, 2> mode_deltas;
};
static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
struct Vp9EntropyProbs {
- std::array<u8, 36> y_mode_prob{};
- std::array<u8, 64> partition_prob{};
- std::array<u8, 1728> coef_probs{};
- std::array<u8, 8> switchable_interp_prob{};
- std::array<u8, 28> inter_mode_prob{};
- std::array<u8, 4> intra_inter_prob{};
- std::array<u8, 5> comp_inter_prob{};
- std::array<u8, 10> single_ref_prob{};
- std::array<u8, 5> comp_ref_prob{};
- std::array<u8, 6> tx_32x32_prob{};
- std::array<u8, 4> tx_16x16_prob{};
- std::array<u8, 2> tx_8x8_prob{};
- std::array<u8, 3> skip_probs{};
- std::array<u8, 3> joints{};
- std::array<u8, 2> sign{};
- std::array<u8, 20> classes{};
- std::array<u8, 2> class_0{};
- std::array<u8, 20> prob_bits{};
- std::array<u8, 12> class_0_fr{};
- std::array<u8, 6> fr{};
- std::array<u8, 2> class_0_hp{};
- std::array<u8, 2> high_precision{};
+ std::array<u8, 36> y_mode_prob; ///< 0x0000
+ std::array<u8, 64> partition_prob; ///< 0x0024
+ std::array<u8, 1728> coef_probs; ///< 0x0064
+ std::array<u8, 8> switchable_interp_prob; ///< 0x0724
+ std::array<u8, 28> inter_mode_prob; ///< 0x072C
+ std::array<u8, 4> intra_inter_prob; ///< 0x0748
+ std::array<u8, 5> comp_inter_prob; ///< 0x074C
+ std::array<u8, 10> single_ref_prob; ///< 0x0751
+ std::array<u8, 5> comp_ref_prob; ///< 0x075B
+ std::array<u8, 6> tx_32x32_prob; ///< 0x0760
+ std::array<u8, 4> tx_16x16_prob; ///< 0x0766
+ std::array<u8, 2> tx_8x8_prob; ///< 0x076A
+ std::array<u8, 3> skip_probs; ///< 0x076C
+ std::array<u8, 3> joints; ///< 0x076F
+ std::array<u8, 2> sign; ///< 0x0772
+ std::array<u8, 20> classes; ///< 0x0774
+ std::array<u8, 2> class_0; ///< 0x0788
+ std::array<u8, 20> prob_bits; ///< 0x078A
+ std::array<u8, 12> class_0_fr; ///< 0x079E
+ std::array<u8, 6> fr; ///< 0x07AA
+ std::array<u8, 2> class_0_hp; ///< 0x07B0
+ std::array<u8, 2> high_precision; ///< 0x07B2
};
static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
struct Vp9PictureInfo {
- bool is_key_frame{};
- bool intra_only{};
- bool last_frame_was_key{};
- bool frame_size_changed{};
- bool error_resilient_mode{};
- bool last_frame_shown{};
- bool show_frame{};
- std::array<s8, 4> ref_frame_sign_bias{};
- s32 base_q_index{};
- s32 y_dc_delta_q{};
- s32 uv_dc_delta_q{};
- s32 uv_ac_delta_q{};
- bool lossless{};
- s32 transform_mode{};
- bool allow_high_precision_mv{};
- s32 interp_filter{};
- s32 reference_mode{};
- s8 comp_fixed_ref{};
- std::array<s8, 2> comp_var_ref{};
- s32 log2_tile_cols{};
- s32 log2_tile_rows{};
- bool segment_enabled{};
- bool segment_map_update{};
- bool segment_map_temporal_update{};
- s32 segment_abs_delta{};
- std::array<u32, 8> segment_feature_enable{};
- std::array<std::array<s16, 4>, 8> segment_feature_data{};
- bool mode_ref_delta_enabled{};
- bool use_prev_in_find_mv_refs{};
- std::array<s8, 4> ref_deltas{};
- std::array<s8, 2> mode_deltas{};
- Vp9EntropyProbs entropy{};
- Vp9FrameDimensions frame_size{};
- u8 first_level{};
- u8 sharpness_level{};
- u32 bitstream_size{};
- std::array<u64, 4> frame_offsets{};
- std::array<bool, 4> refresh_frame{};
+ bool is_key_frame;
+ bool intra_only;
+ bool last_frame_was_key;
+ bool frame_size_changed;
+ bool error_resilient_mode;
+ bool last_frame_shown;
+ bool show_frame;
+ std::array<s8, 4> ref_frame_sign_bias;
+ s32 base_q_index;
+ s32 y_dc_delta_q;
+ s32 uv_dc_delta_q;
+ s32 uv_ac_delta_q;
+ bool lossless;
+ s32 transform_mode;
+ bool allow_high_precision_mv;
+ s32 interp_filter;
+ s32 reference_mode;
+ s8 comp_fixed_ref;
+ std::array<s8, 2> comp_var_ref;
+ s32 log2_tile_cols;
+ s32 log2_tile_rows;
+ bool segment_enabled;
+ bool segment_map_update;
+ bool segment_map_temporal_update;
+ s32 segment_abs_delta;
+ std::array<u32, 8> segment_feature_enable;
+ std::array<std::array<s16, 4>, 8> segment_feature_data;
+ bool mode_ref_delta_enabled;
+ bool use_prev_in_find_mv_refs;
+ std::array<s8, 4> ref_deltas;
+ std::array<s8, 2> mode_deltas;
+ Vp9EntropyProbs entropy;
+ Vp9FrameDimensions frame_size;
+ u8 first_level;
+ u8 sharpness_level;
+ u32 bitstream_size;
+ std::array<u64, 4> frame_offsets;
+ std::array<bool, 4> refresh_frame;
};
struct Vp9FrameContainer {
@@ -138,35 +138,35 @@ struct Vp9FrameContainer {
};
struct PictureInfo {
- INSERT_PADDING_WORDS(12);
- u32 bitstream_size{};
- INSERT_PADDING_WORDS(5);
- Vp9FrameDimensions last_frame_size{};
- Vp9FrameDimensions golden_frame_size{};
- Vp9FrameDimensions alt_frame_size{};
- Vp9FrameDimensions current_frame_size{};
- u32 vp9_flags{};
- std::array<s8, 4> ref_frame_sign_bias{};
- u8 first_level{};
- u8 sharpness_level{};
- u8 base_q_index{};
- u8 y_dc_delta_q{};
- u8 uv_ac_delta_q{};
- u8 uv_dc_delta_q{};
- u8 lossless{};
- u8 tx_mode{};
- u8 allow_high_precision_mv{};
- u8 interp_filter{};
- u8 reference_mode{};
- s8 comp_fixed_ref{};
- std::array<s8, 2> comp_var_ref{};
- u8 log2_tile_cols{};
- u8 log2_tile_rows{};
- Segmentation segmentation{};
- LoopFilter loop_filter{};
- INSERT_PADDING_BYTES(5);
- u32 surface_params{};
- INSERT_PADDING_WORDS(3);
+ INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00
+ u32 bitstream_size; ///< 0x30
+ INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34
+ Vp9FrameDimensions last_frame_size; ///< 0x48
+ Vp9FrameDimensions golden_frame_size; ///< 0x50
+ Vp9FrameDimensions alt_frame_size; ///< 0x58
+ Vp9FrameDimensions current_frame_size; ///< 0x60
+ u32 vp9_flags; ///< 0x68
+ std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
+ u8 first_level; ///< 0x70
+ u8 sharpness_level; ///< 0x71
+ u8 base_q_index; ///< 0x72
+ u8 y_dc_delta_q; ///< 0x73
+ u8 uv_ac_delta_q; ///< 0x74
+ u8 uv_dc_delta_q; ///< 0x75
+ u8 lossless; ///< 0x76
+ u8 tx_mode; ///< 0x77
+ u8 allow_high_precision_mv; ///< 0x78
+ u8 interp_filter; ///< 0x79
+ u8 reference_mode; ///< 0x7A
+ s8 comp_fixed_ref; ///< 0x7B
+ std::array<s8, 2> comp_var_ref; ///< 0x7C
+ u8 log2_tile_cols; ///< 0x7E
+ u8 log2_tile_rows; ///< 0x7F
+ Segmentation segmentation; ///< 0x80
+ LoopFilter loop_filter; ///< 0xE4
+ INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
+ u32 surface_params; ///< 0xF0
+ INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
[[nodiscard]] Vp9PictureInfo Convert() const {
return {
@@ -176,6 +176,7 @@ struct PictureInfo {
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
+ .show_frame = false,
.ref_frame_sign_bias = ref_frame_sign_bias,
.base_q_index = base_q_index,
.y_dc_delta_q = y_dc_delta_q,
@@ -204,45 +205,48 @@ struct PictureInfo {
!(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
.ref_deltas = loop_filter.ref_deltas,
.mode_deltas = loop_filter.mode_deltas,
+ .entropy{},
.frame_size = current_frame_size,
.first_level = first_level,
.sharpness_level = sharpness_level,
.bitstream_size = bitstream_size,
+ .frame_offsets{},
+ .refresh_frame{},
};
}
};
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
struct EntropyProbs {
- INSERT_PADDING_BYTES(1024);
- std::array<u8, 28> inter_mode_prob{};
- std::array<u8, 4> intra_inter_prob{};
- INSERT_PADDING_BYTES(80);
- std::array<u8, 2> tx_8x8_prob{};
- std::array<u8, 4> tx_16x16_prob{};
- std::array<u8, 6> tx_32x32_prob{};
- std::array<u8, 4> y_mode_prob_e8{};
- std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
- INSERT_PADDING_BYTES(64);
- std::array<u8, 64> partition_prob{};
- INSERT_PADDING_BYTES(10);
- std::array<u8, 8> switchable_interp_prob{};
- std::array<u8, 5> comp_inter_prob{};
- std::array<u8, 3> skip_probs{};
- INSERT_PADDING_BYTES(1);
- std::array<u8, 3> joints{};
- std::array<u8, 2> sign{};
- std::array<u8, 2> class_0{};
- std::array<u8, 6> fr{};
- std::array<u8, 2> class_0_hp{};
- std::array<u8, 2> high_precision{};
- std::array<u8, 20> classes{};
- std::array<u8, 12> class_0_fr{};
- std::array<u8, 20> pred_bits{};
- std::array<u8, 10> single_ref_prob{};
- std::array<u8, 5> comp_ref_prob{};
- INSERT_PADDING_BYTES(17);
- std::array<u8, 2304> coef_probs{};
+ INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
+ std::array<u8, 28> inter_mode_prob; ///< 0x0400
+ std::array<u8, 4> intra_inter_prob; ///< 0x041C
+ INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
+ std::array<u8, 2> tx_8x8_prob; ///< 0x0470
+ std::array<u8, 4> tx_16x16_prob; ///< 0x0472
+ std::array<u8, 6> tx_32x32_prob; ///< 0x0476
+ std::array<u8, 4> y_mode_prob_e8; ///< 0x047C
+ std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
+ INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0
+ std::array<u8, 64> partition_prob; ///< 0x04E0
+ INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520
+ std::array<u8, 8> switchable_interp_prob; ///< 0x052A
+ std::array<u8, 5> comp_inter_prob; ///< 0x0532
+ std::array<u8, 3> skip_probs; ///< 0x0537
+ INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A
+ std::array<u8, 3> joints; ///< 0x053B
+ std::array<u8, 2> sign; ///< 0x053E
+ std::array<u8, 2> class_0; ///< 0x0540
+ std::array<u8, 6> fr; ///< 0x0542
+ std::array<u8, 2> class_0_hp; ///< 0x0548
+ std::array<u8, 2> high_precision; ///< 0x054A
+ std::array<u8, 20> classes; ///< 0x054C
+ std::array<u8, 12> class_0_fr; ///< 0x0560
+ std::array<u8, 20> pred_bits; ///< 0x056C
+ std::array<u8, 10> single_ref_prob; ///< 0x0580
+ std::array<u8, 5> comp_ref_prob; ///< 0x058A
+ INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F
+ std::array<u8, 2304> coef_probs; ///< 0x05A0
void Convert(Vp9EntropyProbs& fc) {
fc.inter_mode_prob = inter_mode_prob;
@@ -293,10 +297,45 @@ struct RefPoolElement {
};
struct FrameContexts {
- s64 from{};
- bool adapted{};
- Vp9EntropyProbs probs{};
+ s64 from;
+ bool adapted;
+ Vp9EntropyProbs probs;
};
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(partition_prob, 0x0024);
+ASSERT_POSITION(switchable_interp_prob, 0x0724);
+ASSERT_POSITION(sign, 0x0772);
+ASSERT_POSITION(class_0_fr, 0x079E);
+ASSERT_POSITION(high_precision, 0x07B2);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(PictureInfo, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(bitstream_size, 0x30);
+ASSERT_POSITION(last_frame_size, 0x48);
+ASSERT_POSITION(first_level, 0x70);
+ASSERT_POSITION(segmentation, 0x80);
+ASSERT_POSITION(loop_filter, 0xE4);
+ASSERT_POSITION(surface_params, 0xF0);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(EntropyProbs, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(inter_mode_prob, 0x400);
+ASSERT_POSITION(tx_8x8_prob, 0x470);
+ASSERT_POSITION(partition_prob, 0x4E0);
+ASSERT_POSITION(class_0, 0x540);
+ASSERT_POSITION(class_0_fr, 0x560);
+ASSERT_POSITION(coef_probs, 0x5A0);
+#undef ASSERT_POSITION
+
}; // namespace Decoder
}; // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index e4f919afd..b5e3b70fc 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -8,22 +8,21 @@
namespace Tegra {
-Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
+#define NVDEC_REG_INDEX(field_name) \
+ (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
+
+Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
Nvdec::~Nvdec() = default;
-void Nvdec::ProcessMethod(Method method, u32 argument) {
- if (method == Method::SetVideoCodec) {
- codec->StateWrite(static_cast<u32>(method), argument);
- } else {
- codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
- }
+void Nvdec::ProcessMethod(u32 method, u32 argument) {
+ state.reg_array[method] = static_cast<u64>(argument) << 8;
switch (method) {
- case Method::SetVideoCodec:
+ case NVDEC_REG_INDEX(set_codec_id):
codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
break;
- case Method::Execute:
+ case NVDEC_REG_INDEX(execute):
Execute();
break;
}
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e66be80b8..6e1da0b04 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -14,16 +14,11 @@ class GPU;
class Nvdec {
public:
- enum class Method : u32 {
- SetVideoCodec = 0x80,
- Execute = 0xc0,
- };
-
explicit Nvdec(GPU& gpu);
~Nvdec();
/// Writes the method into the state, Invoke Execute() if encountered
- void ProcessMethod(Method method, u32 argument);
+ void ProcessMethod(u32 method, u32 argument);
/// Return most recently decoded frame
[[nodiscard]] AVFramePtr GetFrame();
@@ -33,6 +28,7 @@ private:
void Execute();
GPU& gpu;
+ NvdecCommon::NvdecRegisters state;
std::unique_ptr<Codec> codec;
};
} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
index 01b5e086d..6a24e00a0 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -4,40 +4,13 @@
#pragma once
+#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra::NvdecCommon {
-struct NvdecRegisters {
- INSERT_PADDING_WORDS(256);
- u64 set_codec_id{};
- INSERT_PADDING_WORDS(254);
- u64 set_platform_id{};
- u64 picture_info_offset{};
- u64 frame_bitstream_offset{};
- u64 frame_number{};
- u64 h264_slice_data_offsets{};
- u64 h264_mv_dump_offset{};
- INSERT_PADDING_WORDS(6);
- u64 frame_stats_offset{};
- u64 h264_last_surface_luma_offset{};
- u64 h264_last_surface_chroma_offset{};
- std::array<u64, 17> surface_luma_offset{};
- std::array<u64, 17> surface_chroma_offset{};
- INSERT_PADDING_WORDS(132);
- u64 vp9_entropy_probs_offset{};
- u64 vp9_backward_updates_offset{};
- u64 vp9_last_frame_segmap_offset{};
- u64 vp9_curr_frame_segmap_offset{};
- INSERT_PADDING_WORDS(2);
- u64 vp9_last_frame_mvs_offset{};
- u64 vp9_curr_frame_mvs_offset{};
- INSERT_PADDING_WORDS(2);
-};
-static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
-
-enum class VideoCodec : u32 {
+enum class VideoCodec : u64 {
None = 0x0,
H264 = 0x3,
Vp8 = 0x5,
@@ -45,4 +18,76 @@ enum class VideoCodec : u32 {
Vp9 = 0x9,
};
+// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
+// doubling the sizes here is compensating for that.
+struct NvdecRegisters {
+ static constexpr std::size_t NUM_REGS = 0x178;
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
+ VideoCodec set_codec_id; ///< 0x0400
+ INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
+ u64 execute; ///< 0x0600
+ INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
+ struct { ///< 0x0800
+ union {
+ BitField<0, 3, VideoCodec> codec;
+ BitField<4, 1, u64> gp_timer_on;
+ BitField<13, 1, u64> mb_timer_on;
+ BitField<14, 1, u64> intra_frame_pslc;
+ BitField<17, 1, u64> all_intra_frame;
+ };
+ } control_params;
+ u64 picture_info_offset; ///< 0x0808
+ u64 frame_bitstream_offset; ///< 0x0810
+ u64 frame_number; ///< 0x0818
+ u64 h264_slice_data_offsets; ///< 0x0820
+ u64 h264_mv_dump_offset; ///< 0x0828
+ INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
+ u64 frame_stats_offset; ///< 0x0848
+ u64 h264_last_surface_luma_offset; ///< 0x0850
+ u64 h264_last_surface_chroma_offset; ///< 0x0858
+ std::array<u64, 17> surface_luma_offset; ///< 0x0860
+ std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
+ INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970
+ u64 vp9_entropy_probs_offset; ///< 0x0B80
+ u64 vp9_backward_updates_offset; ///< 0x0B88
+ u64 vp9_last_frame_segmap_offset; ///< 0x0B90
+ u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
+ INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
+ u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
+ u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
+ INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
+ };
+ std::array<u64, NUM_REGS> reg_array;
+ };
+};
+static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(set_codec_id, 0x80);
+ASSERT_REG_POSITION(execute, 0xC0);
+ASSERT_REG_POSITION(control_params, 0x100);
+ASSERT_REG_POSITION(picture_info_offset, 0x101);
+ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
+ASSERT_REG_POSITION(frame_number, 0x103);
+ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
+ASSERT_REG_POSITION(frame_stats_offset, 0x109);
+ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
+ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
+ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
+ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
+ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
+ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
+ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
+ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
+ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
+ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
+
+#undef ASSERT_REG_POSITION
+
} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 0a8b82f2b..ff3db0aee 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -3,7 +3,21 @@
// Refer to the license.txt file included.
#include <array>
+
+extern "C" {
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#include <libswscale/swscale.h>
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
#include "common/assert.h"
+#include "common/logging/log.h"
+
#include "video_core/command_classes/nvdec.h"
#include "video_core/command_classes/vic.h"
#include "video_core/engines/maxwell_3d.h"
@@ -11,10 +25,6 @@
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
-extern "C" {
-#include <libswscale/swscale.h>
-}
-
namespace Tegra {
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0f640fdae..f26530ede 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -7,6 +7,10 @@
#include "video_core/engines/fermi_2d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
namespace Tegra::Engines {
@@ -49,7 +53,7 @@ void Fermi2D::Blit() {
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
const auto& args = regs.pixels_from_memory;
- const Config config{
+ Config config{
.operation = regs.operation,
.filter = args.sample_mode.filter,
.dst_x0 = args.dst_x0,
@@ -61,7 +65,21 @@ void Fermi2D::Blit() {
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
- if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
+ Surface src = regs.src;
+ const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+ const auto need_align_to_pitch =
+ src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
+ static_cast<s32>(src.width) == config.src_x1 &&
+ config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
+ if (need_align_to_pitch) {
+ auto address = src.Address() + config.src_x0 * bytes_per_pixel;
+ src.addr_upper = static_cast<u32>(address >> 32);
+ src.addr_lower = static_cast<u32>(address);
+ src.width -= config.src_x0;
+ config.src_x1 -= config.src_x0;
+ config.src_x0 = 0;
+ }
+ if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 2208e1922..c9cff7450 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,7 +18,10 @@ set(SHADER_FILES
vulkan_uint8.comp
)
-find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
+find_program(GLSLANGVALIDATOR "glslangValidator")
+if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND")
+ message(FATAL_ERROR "Required program `glslangValidator` not found.")
+endif()
set(GLSL_FLAGS "")
set(QUIET_FLAG "--quiet")
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7124c755c..d2b9d5f2b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
} else {
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
}
- // Flush and invalidate through the GPU interface, to be asynchronous if possible.
- const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
- rasterizer->UnmapMemory(*cpu_addr, size);
+ const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
+
+ for (const auto& map : submapped_ranges) {
+ // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+ const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
+ ASSERT(cpu_addr);
+
+ rasterizer->UnmapMemory(*cpu_addr, map.second);
+ }
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
@@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
//// Lock the new page
// TryLockPage(page_entry, size);
+ auto& current_page = page_table[PageEntryIndex(gpu_addr)];
- page_table[PageEntryIndex(gpu_addr)] = page_entry;
+ if ((!current_page.IsValid() && page_entry.IsValid()) ||
+ current_page.ToAddress() != page_entry.ToAddress()) {
+ rasterizer->ModifyGPUMemory(gpu_addr, size);
+ }
+
+ current_page = page_entry;
}
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
@@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
return page_entry.ToAddress() + (gpu_addr & page_mask);
}
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
+ size_t page_index{addr >> page_bits};
+ const size_t page_last{(addr + size + page_size - 1) >> page_bits};
+ while (page_index < page_last) {
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (page_addr && *page_addr != 0) {
+ return page_addr;
+ }
+ ++page_index;
+ }
+ return std::nullopt;
+}
+
template <typename T>
T MemoryManager::Read(GPUVAddr addr) const {
if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
return page <= Core::Memory::PAGE_SIZE;
}
+bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
+ size_t page_index{gpu_addr >> page_bits};
+ const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+ std::optional<VAddr> old_page_addr{};
+ while (page_index != page_last) {
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (!page_addr || *page_addr == 0) {
+ return false;
+ }
+ if (old_page_addr) {
+ if (*old_page_addr + page_size != *page_addr) {
+ return false;
+ }
+ }
+ old_page_addr = page_addr;
+ ++page_index;
+ }
+ return true;
+}
+
+bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
+ size_t page_index{gpu_addr >> page_bits};
+ const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+ while (page_index < page_last) {
+ if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
+ return false;
+ }
+ ++page_index;
+ }
+ return true;
+}
+
+std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
+ GPUVAddr gpu_addr, std::size_t size) const {
+ std::vector<std::pair<GPUVAddr, std::size_t>> result{};
+ size_t page_index{gpu_addr >> page_bits};
+ size_t remaining_size{size};
+ size_t page_offset{gpu_addr & page_mask};
+ std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
+ std::optional<VAddr> old_page_addr{};
+ const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
+ if (!last_segment) {
+ GPUVAddr new_base_addr = page_index << page_bits;
+ last_segment = {new_base_addr, bytes};
+ } else {
+ last_segment->second += bytes;
+ }
+ };
+ const auto split = [this, &last_segment, &result] {
+ if (last_segment) {
+ result.push_back(*last_segment);
+ last_segment = std::nullopt;
+ }
+ };
+ while (remaining_size > 0) {
+ const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (!page_addr) {
+ split();
+ } else if (old_page_addr) {
+ if (*old_page_addr + page_size != *page_addr) {
+ split();
+ }
+ extend_size(num_bytes);
+ } else {
+ extend_size(num_bytes);
+ }
+ ++page_index;
+ page_offset = 0;
+ remaining_size -= num_bytes;
+ }
+ split();
+ return result;
+}
+
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b3538d503..99d13e7f6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -76,6 +76,8 @@ public:
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+ [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
+
template <typename T>
[[nodiscard]] T Read(GPUVAddr addr) const;
@@ -112,10 +114,28 @@ public:
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
/**
- * IsGranularRange checks if a gpu region can be simply read with a pointer.
+ * Checks if a gpu region can be simply read with a pointer.
*/
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
+ /**
+ * Checks if a gpu region is mapped by a single range of cpu addresses.
+ */
+ [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+ /**
+ * Checks if a gpu region is mapped entirely.
+ */
+ [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+ /**
+ * Returns a vector with all the subranges of cpu addresses mapped beneath.
+ * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
+ * will be returned;
+ */
+ std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
+ std::size_t size) const;
+
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 07939432f..0cec4225b 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -87,6 +87,9 @@ public:
/// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
+ /// Remap GPU memory range. This means underneath backing memory changed
+ virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
+
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 320ee8d30..63d8ad42a 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -42,6 +42,8 @@ public:
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
+ [[nodiscard]] virtual std::string GetDeviceVendor() const = 0;
+
// Getter/setter functions:
// ------------------------
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3f4532ca7..3b00614e7 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -202,13 +202,13 @@ Device::Device() {
LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
throw std::runtime_error{"Insufficient version"};
}
- const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+ vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
- const bool is_nvidia = vendor == "NVIDIA Corporation";
- const bool is_amd = vendor == "ATI Technologies Inc.";
- const bool is_intel = vendor == "Intel";
+ const bool is_nvidia = vendor_name == "NVIDIA Corporation";
+ const bool is_amd = vendor_name == "ATI Technologies Inc.";
+ const bool is_intel = vendor_name == "Intel";
#ifdef __unix__
const bool is_linux = true;
@@ -275,6 +275,56 @@ Device::Device() {
}
}
+std::string Device::GetVendorName() const {
+ if (vendor_name == "NVIDIA Corporation") {
+ return "NVIDIA";
+ }
+ if (vendor_name == "ATI Technologies Inc.") {
+ return "AMD";
+ }
+ if (vendor_name == "Intel") {
+ // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
+ // Simply return `INTEL` for those as well as the Windows driver.
+ return "INTEL";
+ }
+ if (vendor_name == "Intel Open Source Technology Center") {
+ return "I965";
+ }
+ if (vendor_name == "Mesa Project") {
+ return "I915";
+ }
+ if (vendor_name == "Mesa/X.org") {
+ // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
+ // MESA instead of one of those driver names.
+ return "MESA";
+ }
+ if (vendor_name == "AMD") {
+ return "RADEONSI";
+ }
+ if (vendor_name == "nouveau") {
+ return "NOUVEAU";
+ }
+ if (vendor_name == "X.Org") {
+ return "R600";
+ }
+ if (vendor_name == "Collabora Ltd") {
+ return "ZINK";
+ }
+ if (vendor_name == "Intel Corporation") {
+ return "OPENSWR";
+ }
+ if (vendor_name == "Microsoft Corporation") {
+ return "D3D12";
+ }
+ if (vendor_name == "NVIDIA") {
+ // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
+ // strategy would have returned `NVIDIA` here for this driver, the same result as the
+ // proprietary driver.
+ return "TEGRA";
+ }
+ return vendor_name;
+}
+
Device::Device(std::nullptr_t) {
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
uniform_buffer_alignment = 4;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index f24bd0c7b..2c2b13767 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -22,6 +22,8 @@ public:
explicit Device();
explicit Device(std::nullptr_t);
+ [[nodiscard]] std::string GetVendorName() const;
+
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
}
@@ -130,6 +132,7 @@ private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
+ std::string vendor_name;
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
size_t uniform_buffer_alignment{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eb8bdaa85..07ad0e205 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
shader_cache.OnCPUWrite(addr, size);
}
+void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapGPUMemory(addr, size);
+ }
+}
+
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9995a563b..482efed7a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,6 +80,7 @@ public:
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 23948feed..ff0f03e99 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -327,7 +327,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
if (format_info.is_compressed) {
return false;
}
- if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
+ if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) ==
+ ACCELERATED_FORMATS.end()) {
return false;
}
if (format_info.compatibility_by_size) {
@@ -341,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
VideoCommon::SubresourceLayers subresource, GLenum target) {
switch (target) {
+ case GL_TEXTURE_1D:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(0),
+ .z = static_cast<GLint>(0),
+ };
+ case GL_TEXTURE_1D_ARRAY:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(0),
+ .z = static_cast<GLint>(subresource.base_layer),
+ };
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
return CopyOrigin{
@@ -366,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
VideoCommon::SubresourceLayers dst_subresource,
GLenum target) {
switch (target) {
+ case GL_TEXTURE_1D:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(1),
+ .depth = static_cast<GLsizei>(1),
+ };
+ case GL_TEXTURE_1D_ARRAY:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(1),
+ .depth = static_cast<GLsizei>(dst_subresource.num_layers),
+ };
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
return CopyRegion{
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc19a110f..0b66f8332 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -70,6 +70,10 @@ public:
return &rasterizer;
}
+ [[nodiscard]] std::string GetDeviceVendor() const override {
+ return device.GetVendorName();
+ }
+
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index abaf1ee6a..8fb5be393 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
- copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
- copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+ copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 72071316c..d7d17e110 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -47,6 +47,10 @@ public:
return &rasterizer;
}
+ [[nodiscard]] std::string GetDeviceVendor() const override {
+ return device.GetDriverName();
+ }
+
private:
void Report() const;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8cb65e588..0df4e1a1c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) {
template <typename T>
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
- std::ranges::transform(indices, indices.begin(),
- [quad, first](u32 index) { return first + index + quad * 4; });
+ for (T& index : indices) {
+ index = static_cast<T>(first + index + quad * 4);
+ }
return indices;
}
} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1c9120170..bd4d649cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
pipeline_cache.OnCPUWrite(addr, size);
}
+void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapGPUMemory(addr, size);
+ }
+}
+
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index cb8c5c279..41459c5c5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -72,6 +72,7 @@ public:
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index ad69d32d1..6052d148a 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
}
}
+ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
+ : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
+
std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
if (other_addr < gpu_addr) {
// Subresource address can't be lower than the base
@@ -82,7 +85,7 @@ std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const
if (info.type != ImageType::e3D) {
const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
const auto end = mip_level_offsets.begin() + info.resources.levels;
- const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
+ const auto it = std::find(mip_level_offsets.begin(), end, static_cast<u32>(mip_offset));
if (layer > info.resources.layers || it == end) {
return std::nullopt;
}
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index e326cab71..ff1feda9b 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 {
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
+ Remapped = 1 << 8, ///< Image has been remapped.
+ Sparse = 1 << 9, ///< Image has non continous submemory.
// Garbage Collection Flags
- BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
- ///< garbage collection priority
- Alias = 1 << 9, ///< This image has aliases and has priority on garbage
- ///< collection
+ BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher
+ ///< garbage collection priority
+ Alias = 1 << 11, ///< This image has aliases and has priority on garbage
+ ///< collection
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@@ -57,6 +59,12 @@ struct ImageBase {
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
}
+ [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_gpu_addr + overlap_size;
+ const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
+ return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+ }
+
void CheckBadOverlapState();
void CheckAliasState();
@@ -84,6 +92,29 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images;
+ ImageMapId map_view_id{};
+};
+
+struct ImageMapView {
+ explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
+
+ [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_cpu_addr + overlap_size;
+ const VAddr cpu_addr_end = cpu_addr + size;
+ return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
+ }
+
+ [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+ const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
+ const GPUVAddr gpu_addr_end = gpu_addr + size;
+ return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+ }
+
+ GPUVAddr gpu_addr;
+ VAddr cpu_addr;
+ size_t size;
+ ImageId image_id;
+ bool picked{};
};
struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 84530a179..01de2d498 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -13,6 +13,7 @@
#include <span>
#include <type_traits>
#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -110,9 +111,6 @@ public:
/// Notify the cache that a new frame has been queued
void TickFrame();
- /// Runs the Garbage Collector.
- void RunGarbageCollector();
-
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
@@ -155,12 +153,13 @@ public:
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
+ /// Remove images in a region
+ void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
+
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
- const Tegra::Engines::Fermi2D::Config& copy,
- std::optional<Region2D> src_region_override = {},
- std::optional<Region2D> dst_region_override = {});
+ const Tegra::Engines::Fermi2D::Config& copy);
/// Invalidate the contents of the color buffer index
/// These contents become unspecified, the cache can assume aggressive optimizations.
@@ -193,7 +192,22 @@ public:
private:
/// Iterate over all page indices in a range
template <typename Func>
- static void ForEachPage(VAddr addr, size_t size, Func&& func) {
+ static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
+ static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+ const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ if constexpr (RETURNS_BOOL) {
+ if (func(page)) {
+ break;
+ }
+ } else {
+ func(page);
+ }
+ }
+ }
+
+ template <typename Func>
+ static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
@@ -207,6 +221,9 @@ private:
}
}
+ /// Runs the Garbage Collector.
+ void RunGarbageCollector();
+
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
@@ -220,7 +237,7 @@ private:
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
- void RefreshContents(Image& image);
+ void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
@@ -269,6 +286,16 @@ private:
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
+ template <typename Func>
+ void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+ template <typename Func>
+ void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+ /// Iterates over all the images in a region calling func
+ template <typename Func>
+ void ForEachSparseSegment(ImageBase& image, Func&& func);
+
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
@@ -279,10 +306,10 @@ private:
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
- void TrackImage(ImageBase& image);
+ void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
- void UntrackImage(ImageBase& image);
+ void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
@@ -340,7 +367,13 @@ private:
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
- std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
+ std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
+
+ std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
+
+ VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
@@ -349,6 +382,7 @@ private:
u64 critical_memory;
SlotVector<Image> slot_images;
+ SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
@@ -459,7 +493,7 @@ void TextureCache<P>::RunGarbageCollector() {
}
}
if (True(image->flags & ImageFlagBits::Tracked)) {
- UntrackImage(*image);
+ UntrackImage(*image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id);
@@ -658,7 +692,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
return;
}
image.flags |= ImageFlagBits::CpuModified;
- UntrackImage(image);
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image, image_id);
+ }
});
}
@@ -695,7 +731,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
if (True(image.flags & ImageFlagBits::Tracked)) {
- UntrackImage(image);
+ UntrackImage(image, id);
}
UnregisterImage(id);
DeleteImage(id);
@@ -703,11 +739,26 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
}
template <class P>
+void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
+ std::vector<ImageId> deleted_images;
+ ForEachImageInRegionGPU(gpu_addr, size,
+ [&](ImageId id, Image&) { deleted_images.push_back(id); });
+ for (const ImageId id : deleted_images) {
+ Image& image = slot_images[id];
+ if (True(image.flags & ImageFlagBits::Remapped)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Remapped;
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image, id);
+ }
+ }
+}
+
+template <class P>
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
- const Tegra::Engines::Fermi2D::Config& copy,
- std::optional<Region2D> src_override,
- std::optional<Region2D> dst_override) {
+ const Tegra::Engines::Fermi2D::Config& copy) {
const BlitImages images = GetBlitImages(dst, src);
const ImageId dst_id = images.dst_id;
const ImageId src_id = images.src_id;
@@ -718,47 +769,25 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const ImageBase& src_image = slot_images[src_id];
// TODO: Deduplicate
- const std::optional dst_base = dst_image.TryFindBase(dst.Address());
- const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
- const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
- const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
- const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
-
- // out of bounds texture blit checking
- const bool use_override = src_override.has_value();
- const s32 src_x0 = copy.src_x0 >> src_samples_x;
- s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x;
- const s32 src_y0 = copy.src_y0 >> src_samples_y;
- const s32 src_y1 = copy.src_y1 >> src_samples_y;
-
- const auto src_width = static_cast<s32>(src_image.info.size.width);
- const bool width_oob = src_x1 > src_width;
- const auto width_diff = width_oob ? src_x1 - src_width : 0;
- if (width_oob) {
- src_x1 = src_width;
- }
-
- const Region2D src_dimensions{
- Offset2D{.x = src_x0, .y = src_y0},
- Offset2D{.x = src_x1, .y = src_y1},
- };
- const auto src_region = use_override ? *src_override : src_dimensions;
-
const std::optional src_base = src_image.TryFindBase(src.Address());
const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
- const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+ const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
+ const Region2D src_region{
+ Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
+ Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
+ };
- const s32 dst_x0 = copy.dst_x0 >> dst_samples_x;
- const s32 dst_x1 = copy.dst_x1 >> dst_samples_x;
- const s32 dst_y0 = copy.dst_y0 >> dst_samples_y;
- const s32 dst_y1 = copy.dst_y1 >> dst_samples_y;
- const Region2D dst_dimensions{
- Offset2D{.x = dst_x0, .y = dst_y0},
- Offset2D{.x = dst_x1 - width_diff, .y = dst_y1},
+ const std::optional dst_base = dst_image.TryFindBase(dst.Address());
+ const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
+ const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
+ const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
+ const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+ const Region2D dst_region{
+ Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
+ Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
};
- const auto dst_region = use_override ? *dst_override : dst_dimensions;
// Always call this after src_framebuffer_id was queried, as the address might be invalidated.
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -775,21 +804,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
copy.operation);
}
-
- if (width_oob) {
- // Continue copy of the oob region of the texture on the next row
- auto oob_src = src;
- oob_src.height++;
- const Region2D src_region_override{
- Offset2D{.x = 0, .y = src_y0 + 1},
- Offset2D{.x = width_diff, .y = src_y1 + 1},
- };
- const Region2D dst_region_override{
- Offset2D{.x = dst_x1 - width_diff, .y = dst_y0},
- Offset2D{.x = dst_x1, .y = dst_y1},
- };
- BlitImage(dst, oob_src, copy, src_region_override, dst_region_override);
- }
}
template <class P>
@@ -833,9 +847,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
if (it == page_table.end()) {
return nullptr;
}
- const auto& image_ids = it->second;
- for (const ImageId image_id : image_ids) {
- const ImageBase& image = slot_images[image_id];
+ const auto& image_map_ids = it->second;
+ for (const ImageMapId map_id : image_map_ids) {
+ const ImageMapView& map = slot_map_views[map_id];
+ const ImageBase& image = slot_images[map.image_id];
if (image.cpu_addr != cpu_addr) {
continue;
}
@@ -915,13 +930,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
}
template <class P>
-void TextureCache<P>::RefreshContents(Image& image) {
+void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
if (False(image.flags & ImageFlagBits::CpuModified)) {
// Only upload modified images
return;
}
image.flags &= ~ImageFlagBits::CpuModified;
- TrackImage(image);
+ TrackImage(image, image_id);
if (image.info.num_samples > 1) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@@ -958,7 +973,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
- if (!IsValidAddress(gpu_memory, config)) {
+ if (!IsValidEntry(gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
const auto [pair, is_new] = image_views.try_emplace(config);
@@ -1000,14 +1015,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
template <class P>
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
- return ImageId{};
+ cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+ if (!cpu_addr) {
+ return ImageId{};
+ }
}
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
ImageId image_id;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
+ if (True(existing_image.flags & ImageFlagBits::Remapped)) {
+ return false;
+ }
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
const bool strict_size = False(options & RelaxedOptions::Size) &&
True(existing_image.flags & ImageFlagBits::Strong);
@@ -1033,7 +1054,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ const auto size = CalculateGuestSizeInBytes(info);
+ cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+ if (!cpu_addr) {
+ const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
+ virtual_invalid_space += Common::AlignUp(size, 32);
+ cpu_addr = std::optional<VAddr>(fake_addr);
+ }
+ }
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
const Image& image = slot_images[image_id];
@@ -1053,11 +1083,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
std::vector<ImageId> overlap_ids;
+ std::unordered_set<ImageId> overlaps_found;
std::vector<ImageId> left_aliased_ids;
std::vector<ImageId> right_aliased_ids;
+ std::unordered_set<ImageId> ignore_textures;
std::vector<ImageId> bad_overlap_ids;
- ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
- if (info.type != overlap.info.type) {
+ const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
+ if (True(overlap.flags & ImageFlagBits::Remapped)) {
+ ignore_textures.insert(overlap_id);
return;
}
if (info.type == ImageType::Linear) {
@@ -1067,6 +1100,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
}
return;
}
+ overlaps_found.insert(overlap_id);
static constexpr bool strict_size = true;
const std::optional<OverlapResult> solution = ResolveOverlap(
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1090,12 +1124,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
bad_overlap_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::BadOverlap;
}
- });
+ };
+ ForEachImageInRegion(cpu_addr, size_bytes, region_check);
+ const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
+ if (!overlaps_found.contains(overlap_id)) {
+ if (True(overlap.flags & ImageFlagBits::Remapped)) {
+ ignore_textures.insert(overlap_id);
+ }
+ if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
+ ignore_textures.insert(overlap_id);
+ }
+ }
+ };
+ ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
+ if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+ new_image.flags |= ImageFlagBits::Sparse;
+ }
+
+ for (const ImageId overlap_id : ignore_textures) {
+ Image& overlap = slot_images[overlap_id];
+ if (True(overlap.flags & ImageFlagBits::GpuModified)) {
+ UNIMPLEMENTED();
+ }
+ if (True(overlap.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(overlap, overlap_id);
+ }
+ UnregisterImage(overlap_id);
+ DeleteImage(overlap_id);
+ }
+
// TODO: Only upload what we need
- RefreshContents(new_image);
+ RefreshContents(new_image, new_image_id);
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
@@ -1107,7 +1169,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
runtime.CopyImage(new_image, overlap, copies);
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
- UntrackImage(overlap);
+ UntrackImage(overlap, overlap_id);
}
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
@@ -1242,7 +1304,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images;
- ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
+ boost::container::small_vector<ImageMapId, 32> maps;
+ ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if constexpr (BOOL_BREAK) {
@@ -1251,12 +1314,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
return;
}
}
+ for (const ImageMapId map_id : it->second) {
+ ImageMapView& map = slot_map_views[map_id];
+ if (map.picked) {
+ continue;
+ }
+ if (!map.Overlaps(cpu_addr, size)) {
+ continue;
+ }
+ map.picked = true;
+ maps.push_back(map_id);
+ Image& image = slot_images[map.image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(map.image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(map.image_id, image)) {
+ return true;
+ }
+ } else {
+ func(map.image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+ }
+ for (const ImageMapId map_id : maps) {
+ slot_map_views[map_id].picked = false;
+ }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 8> images;
+ ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+ const auto it = gpu_page_table.find(page);
+ if (it == gpu_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
+ for (const ImageId image_id : it->second) {
+ Image& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ if (!image.OverlapsGPU(gpu_addr, size)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(image_id, image)) {
+ return true;
+ }
+ } else {
+ func(image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+ }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 8> images;
+ ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+ const auto it = sparse_page_table.find(page);
+ if (it == sparse_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Picked)) {
continue;
}
- if (!image.Overlaps(cpu_addr, size)) {
+ if (!image.OverlapsGPU(gpu_addr, size)) {
continue;
}
image.flags |= ImageFlagBits::Picked;
@@ -1279,6 +1435,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
}
template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
+ static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
+ const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+ for (auto& segment : segments) {
+ const auto gpu_addr = segment.first;
+ const auto size = segment.second;
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
+ if constexpr (RETURNS_BOOL) {
+ if (func(gpu_addr, *cpu_addr, size)) {
+ break;
+ }
+ } else {
+ func(gpu_addr, *cpu_addr, size);
+ }
+ }
+}
+
+template <class P>
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
Image& image = slot_images[image_id];
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
@@ -1295,8 +1472,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
"Trying to register an already registered image");
image.flags |= ImageFlagBits::Registered;
- ForEachPage(image.cpu_addr, image.guest_size_bytes,
- [this, image_id](u64 page) { page_table[page].push_back(image_id); });
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
@@ -1304,6 +1479,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ auto map_id =
+ slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
+ ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
+ [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+ image.map_view_id = map_id;
+ return;
+ }
+ std::vector<ImageViewId> sparse_maps{};
+ ForEachSparseSegment(
+ image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+ auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
+ ForEachCPUPage(cpu_addr, size,
+ [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+ sparse_maps.push_back(map_id);
+ });
+ sparse_views.emplace(image_id, std::move(sparse_maps));
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
}
template <class P>
@@ -1320,34 +1516,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
- ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
- const auto page_it = page_table.find(page);
- if (page_it == page_table.end()) {
- UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
- return;
- }
- std::vector<ImageId>& image_ids = page_it->second;
- const auto vector_it = std::ranges::find(image_ids, image_id);
- if (vector_it == image_ids.end()) {
- UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
- return;
- }
- image_ids.erase(vector_it);
+ const auto& clear_page_table =
+ [this, image_id](
+ u64 page,
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
+ const auto page_it = selected_page_table.find(page);
+ if (page_it == selected_page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageId>& image_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_ids, image_id);
+ if (vector_it == image_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+ page << PAGE_BITS);
+ return;
+ }
+ image_ids.erase(vector_it);
+ };
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ const auto map_id = image.map_view_id;
+ ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageMapId>& image_map_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_map_ids, map_id);
+ if (vector_it == image_map_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+ page << PAGE_BITS);
+ return;
+ }
+ image_map_ids.erase(vector_it);
+ });
+ slot_map_views.erase(map_id);
+ return;
+ }
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+ clear_page_table(page, sparse_page_table);
});
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map_range = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map_range.cpu_addr;
+ const std::size_t size = map_range.size;
+ ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageMapId>& image_map_ids = page_it->second;
+ auto vector_it = image_map_ids.begin();
+ while (vector_it != image_map_ids.end()) {
+ ImageMapView& map = slot_map_views[*vector_it];
+ if (map.image_id != image_id) {
+ vector_it++;
+ continue;
+ }
+ if (!map.picked) {
+ map.picked = true;
+ }
+ vector_it = image_map_ids.erase(vector_it);
+ }
+ });
+ slot_map_views.erase(map_view_id);
+ }
+ sparse_views.erase(it);
}
template <class P>
-void TextureCache<P>::TrackImage(ImageBase& image) {
+void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked;
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+ return;
+ }
+ if (True(image.flags & ImageFlagBits::Registered)) {
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map.cpu_addr;
+ const std::size_t size = map.size;
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ }
+ return;
+ }
+ ForEachSparseSegment(image,
+ [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ });
}
template <class P>
-void TextureCache<P>::UntrackImage(ImageBase& image) {
+void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
ASSERT(True(image.flags & ImageFlagBits::Tracked));
image.flags &= ~ImageFlagBits::Tracked;
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+ return;
+ }
+ ASSERT(True(image.flags & ImageFlagBits::Registered));
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map.cpu_addr;
+ const std::size_t size = map.size;
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
+ }
}
template <class P>
@@ -1489,10 +1776,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) {
- TrackImage(image);
+ TrackImage(image, image_id);
}
} else {
- RefreshContents(image);
+ RefreshContents(image, image_id);
SynchronizeAliases(image_id);
}
if (is_modification) {
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index c9571f7e4..9fbdc1ac6 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
constexpr SlotId CORRUPT_ID{0xfffffffe};
using ImageId = SlotId;
+using ImageMapId = SlotId;
using ImageViewId = SlotId;
using ImageAllocId = SlotId;
using SamplerId = SlotId;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 4efe042b6..c872517b8 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -394,7 +394,7 @@ template <u32 GOB_EXTENT>
const s32 mip_offset = diff % layer_stride;
const std::array offsets = CalculateMipLevelOffsets(new_info);
const auto end = offsets.begin() + new_info.resources.levels;
- const auto it = std::find(offsets.begin(), end, mip_offset);
+ const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));
if (it == end) {
// Mipmap is not aligned to any valid size
return std::nullopt;
@@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
return offsets;
}
+LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
+ const u32 num_levels = info.resources.levels;
+ const LevelInfo level_info = MakeLevelInfo(info);
+ LevelArray sizes{};
+ for (u32 level = 0; level < num_levels; ++level) {
+ sizes[level] = CalculateLevelSize(level_info, level);
+ }
+ return sizes;
+}
+
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
std::vector<u32> offsets;
@@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies;
}
-bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
- if (config.Address() == 0) {
+bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
+ const GPUVAddr address = config.Address();
+ if (address == 0) {
return false;
}
- if (config.Address() > (u64(1) << 48)) {
+ if (address > (1ULL << 48)) {
return false;
}
- return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
+ if (gpu_memory.GpuToCpuAddress(address).has_value()) {
+ return true;
+ }
+ const ImageInfo info{config};
+ const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
+ return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
}
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index cdc5cbc75..766502908 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -40,6 +40,8 @@ struct OverlapResult {
[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
+[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
+
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
@@ -55,7 +57,7 @@ struct OverlapResult {
const ImageInfo& src,
SubresourceBase base);
-[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
+[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
GPUVAddr gpu_addr, const ImageInfo& info,
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 7b756ba41..3ab500760 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1365,8 +1365,8 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
// each partition.
// Determine partitions, partition index, and color endpoint modes
- s32 planeIdx = -1;
- u32 partitionIndex;
+ u32 planeIdx{UINT32_MAX};
+ u32 partitionIndex{};
u32 colorEndpointMode[4] = {0, 0, 0, 0};
// Define color data.
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 23814afd2..f214510da 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
return (supported_usage & wanted_usage) == wanted_usage;
}
+std::string Device::GetDriverName() const {
+ switch (driver_id) {
+ case VK_DRIVER_ID_AMD_PROPRIETARY:
+ return "AMD";
+ case VK_DRIVER_ID_AMD_OPEN_SOURCE:
+ return "AMDVLK";
+ case VK_DRIVER_ID_MESA_RADV:
+ return "RADV";
+ case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
+ return "NVIDIA";
+ case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
+ return "INTEL";
+ case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
+ return "ANV";
+ case VK_DRIVER_ID_MESA_LLVMPIPE:
+ return "LAVAPIPE";
+ default:
+ return vendor_name;
+ }
+}
+
void Device::CheckSuitability(bool requires_swapchain) const {
std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
bool has_swapchain = false;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 88b298196..96c0f8c60 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -45,6 +45,9 @@ public:
/// Reports a shader to Nsight Aftermath.
void SaveShader(const std::vector<u32>& spirv) const;
+ /// Returns the name of the VkDriverId reported from Vulkan.
+ std::string GetDriverName() const;
+
/// Returns the dispatch loader with direct function pointers of the device.
const vk::DeviceDispatch& GetDispatchLoader() const {
return dld;
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index efdc6aa50..7a6f84d96 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) {
}
void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
ev->accept();
}
void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
ev->accept();
}
void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
ev->accept();
}
void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale,
+ ev->angleDelta().y() / 120);
ev->accept();
}
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 9c5aeb833..218b4782b 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -522,7 +522,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
remove_menu->addSeparator();
QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
- QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
+ QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
+ QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
+ QAction* dump_romfs_sdmc = dump_romfs_menu->addAction(tr("Dump RomFS to SDMC"));
QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry"));
context_menu.addSeparator();
@@ -571,8 +573,12 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() {
emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path);
});
- connect(dump_romfs, &QAction::triggered,
- [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); });
+ connect(dump_romfs, &QAction::triggered, [this, program_id, path]() {
+ emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal);
+ });
+ connect(dump_romfs_sdmc, &QAction::triggered, [this, program_id, path]() {
+ emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::SDMC);
+ });
connect(copy_tid, &QAction::triggered,
[this, program_id]() { emit CopyTIDRequested(program_id); });
connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() {
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index b630e34ff..50402da51 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -45,6 +45,11 @@ enum class GameListRemoveTarget {
CustomConfiguration,
};
+enum class DumpRomFSTarget {
+ Normal,
+ SDMC,
+};
+
enum class InstalledEntryType {
Game,
Update,
@@ -92,7 +97,7 @@ signals:
void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type);
void RemoveFileRequested(u64 program_id, GameListRemoveTarget target,
const std::string& game_path);
- void DumpRomFSRequested(u64 program_id, const std::string& game_path);
+ void DumpRomFSRequested(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
void CopyTIDRequested(u64 program_id);
void NavigateToGamedbEntryRequested(u64 program_id,
const CompatibilityList& compatibility_list);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index cb9d7a863..5ed3b90b8 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -104,6 +104,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "input_common/main.h"
#include "util/overlay_dialog.h"
#include "video_core/gpu.h"
+#include "video_core/renderer_base.h"
#include "video_core/shader_notify.h"
#include "yuzu/about_dialog.h"
#include "yuzu/bootmanager.h"
@@ -1426,8 +1427,12 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
title_name = Common::FS::PathToUTF8String(
std::filesystem::path{filename.toStdU16String()}.filename());
}
+ const bool is_64bit = system.Kernel().CurrentProcess()->Is64BitProcess();
+ const auto instruction_set_suffix = is_64bit ? " (64-bit)" : " (32-bit)";
+ title_name += instruction_set_suffix;
LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version);
- UpdateWindowTitle(title_name, title_version);
+ const auto gpu_vendor = system.GPU().Renderer().GetDeviceVendor();
+ UpdateWindowTitle(title_name, title_version, gpu_vendor);
loading_screen->Prepare(system.GetAppLoader());
loading_screen->show();
@@ -1881,7 +1886,8 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g
}
}
-void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path) {
+void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path,
+ DumpRomFSTarget target) {
const auto failed = [this] {
QMessageBox::warning(this, tr("RomFS Extraction Failed!"),
tr("There was an error copying the RomFS files or the user "
@@ -1909,7 +1915,10 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
return;
}
- const auto dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir);
+ const auto dump_dir =
+ target == DumpRomFSTarget::Normal
+ ? Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)
+ : Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "atmosphere" / "contents";
const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id);
const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir);
@@ -1919,7 +1928,8 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
if (*romfs_title_id == program_id) {
const u64 ivfc_offset = loader->ReadRomFSIVFCOffset();
const FileSys::PatchManager pm{program_id, system.GetFileSystemController(), installed};
- romfs = pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program);
+ romfs =
+ pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program, nullptr, false);
} else {
romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS();
}
@@ -2858,8 +2868,8 @@ void GMainWindow::MigrateConfigFiles() {
}
}
-void GMainWindow::UpdateWindowTitle(const std::string& title_name,
- const std::string& title_version) {
+void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version,
+ std::string_view gpu_vendor) {
const auto branch_name = std::string(Common::g_scm_branch);
const auto description = std::string(Common::g_scm_desc);
const auto build_id = std::string(Common::g_build_id);
@@ -2872,7 +2882,8 @@ void GMainWindow::UpdateWindowTitle(const std::string& title_name,
if (title_name.empty()) {
setWindowTitle(QString::fromStdString(window_title));
} else {
- const auto run_title = fmt::format("{} | {} | {}", window_title, title_name, title_version);
+ const auto run_title =
+ fmt::format("{} | {} | {} | {}", window_title, title_name, title_version, gpu_vendor);
setWindowTitle(QString::fromStdString(run_title));
}
}
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 11f152cbe..45c8310e1 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -34,6 +34,7 @@ class QProgressDialog;
class WaitTreeWidget;
enum class GameListOpenTarget;
enum class GameListRemoveTarget;
+enum class DumpRomFSTarget;
enum class InstalledEntryType;
class GameListPlaceholder;
@@ -244,7 +245,7 @@ private slots:
void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type);
void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target,
const std::string& game_path);
- void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
+ void OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
void OnGameListCopyTID(u64 program_id);
void OnGameListNavigateToGamedbEntry(u64 program_id,
const CompatibilityList& compatibility_list);
@@ -287,8 +288,8 @@ private:
InstallResult InstallNSPXCI(const QString& filename);
InstallResult InstallNCA(const QString& filename);
void MigrateConfigFiles();
- void UpdateWindowTitle(const std::string& title_name = {},
- const std::string& title_version = {});
+ void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {},
+ std::string_view gpu_vendor = {});
void UpdateStatusBar();
void UpdateStatusButtons();
void UpdateUISettings();