diff options
Diffstat (limited to 'src/video_core')
39 files changed, 1293 insertions, 502 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f9454bbaa..e4de55f4d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -292,12 +292,12 @@ endif() if (MSVC) target_compile_options(video_core PRIVATE - /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data /we4456 # Declaration of 'identifier' hides previous local declaration /we4457 # Declaration of 'identifier' hides function parameter /we4458 # Declaration of 'identifier' hides class member /we4459 # Declaration of 'identifier' hides global declaration - /we4715 # 'function' : not all control paths return a value ) else() target_compile_options(video_core PRIVATE diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9d726a6fb..cad7f902d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -99,7 +99,7 @@ class BufferCache { }; public: - static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB; + static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -109,8 +109,6 @@ public: void TickFrame(); - void RunGarbageCollector(); - void WriteMemory(VAddr cpu_addr, u64 size); void CachedWriteMemory(VAddr cpu_addr, u64 size); @@ -197,6 +195,8 @@ private: ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); } + void RunGarbageCollector(); + void BindHostIndexBuffer(); void BindHostVertexBuffers(); @@ -416,8 +416,9 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { template <class P> void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { - ForEachBufferInRange(cpu_addr, size, - [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); + ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { + DownloadBufferMemory(buffer, cpu_addr, size); + }); } template <class P> diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094..8b86ad050 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast<u32>(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), - data); + nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); break; default: break; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260..1b4bbc8ac 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { av_free(ptr); } -Codec::Codec(GPU& gpu_) - : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} Codec::~Codec() { @@ -43,46 +43,48 @@ Codec::~Codec() { avcodec_close(av_codec_ctx); } +void Codec::Initialize() { + AVCodecID codec{AV_CODEC_ID_NONE}; + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + codec = AV_CODEC_ID_H264; + break; + case NvdecCommon::VideoCodec::Vp9: + codec = AV_CODEC_ID_VP9; + break; + default: + return; + } + av_codec = avcodec_find_decoder(codec); + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + return; +} + void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { if (current_codec != codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); current_codec = codec; + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); } } -void Codec::StateWrite(u32 offset, u64 arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); - std::memcpy(state_offset, &arguments, sizeof(u64)); -} - void Codec::Decode() { - bool is_first_frame = false; + const bool is_first_frame = !initialized; if (!initialized) { - if (current_codec == NvdecCommon::VideoCodec::H264) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); - } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); - } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); - return; - } - - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - - // TODO(ameerj): libavcodec gpu hw acceleration - - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - return; - } - initialized = true; - is_first_frame = true; + Initialize(); } - bool vp9_hidden_frame = false; + bool vp9_hidden_frame = false; AVPacket packet{}; av_init_packet(&packet); std::vector<u8> frame_data; @@ -95,7 +97,7 @@ void Codec::Decode() { } packet.data = frame_data.data(); - packet.size = static_cast<int>(frame_data.size()); + packet.size = static_cast<s32>(frame_data.size()); avcodec_send_packet(av_codec_ctx, &packet); @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { return current_codec; } +std::string_view Codec::GetCurrentCodecName() const { + switch (current_codec) { + case NvdecCommon::VideoCodec::None: + return "None"; + case NvdecCommon::VideoCodec::H264: + return "H264"; + case NvdecCommon::VideoCodec::Vp8: + return "VP8"; + case NvdecCommon::VideoCodec::H265: + return "H265"; + case NvdecCommon::VideoCodec::Vp9: + return "VP9"; + default: + return "Unknown"; + } +}; + } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 8a2a6c360..96c823c76 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -34,15 +34,15 @@ class VP9; class Codec { public: - explicit Codec(GPU& gpu); + explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); ~Codec(); + /// Initialize the codec, returning success or failure + void Initialize(); + /// Sets NVDEC video stream codec void SetTargetCodec(NvdecCommon::VideoCodec codec); - /// Populate NvdecRegisters state with argument value at the provided offset - void StateWrite(u32 offset, u64 arguments); - /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); @@ -51,6 +51,8 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec + [[nodiscard]] std::string_view GetCurrentCodecName() const; private: bool initialized{}; @@ -60,10 +62,10 @@ private: AVCodecContext* av_codec_ctx{nullptr}; GPU& gpu; + const NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder; - NvdecCommon::NvdecRegisters state{}; std::queue<AVFramePtr> av_frames{}; }; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed98..5fb6d45ee 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,134 +45,129 @@ H264::~H264() = default; const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, bool is_first_frame) { - H264DecoderContext context{}; + H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); - const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); + const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.frame_data_size); - + frame.resize(context.stream_len); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); - } else { - /// Encode header - H264BitWriter writer{}; - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(7, 5); - writer.WriteU(100, 8); - writer.WriteU(0, 8); - writer.WriteU(31, 8); - writer.WriteUe(0); - const auto chroma_format_idc = - static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); - writer.WriteUe(chroma_format_idc); - if (chroma_format_idc == 3) { - writer.WriteBit(false); - } - - writer.WriteUe(0); - writer.WriteUe(0); - writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag - writer.WriteBit(false); // Scaling matrix present flag - - const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); - writer.WriteUe(order_cnt_type); - if (order_cnt_type == 0) { - writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); - } else if (order_cnt_type == 1) { - writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); - - writer.WriteSe(0); - writer.WriteSe(0); - writer.WriteUe(0); - } - - const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / - (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + return frame; + } - writer.WriteUe(16); + // Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const u32 chroma_format_idc = + static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { writer.WriteBit(false); - writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); - writer.WriteUe(pic_height - 1); - writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - - if (!context.h264_parameter_set.frame_mbs_only_flag) { - writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); - } + } - writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); - writer.WriteBit(false); // Frame cropping flag - writer.WriteBit(false); // VUI parameter present flag + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag - writer.End(); + writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); - // H264 PPS - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(8, 5); + const auto order_cnt_type = + static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + writer.WriteSe(0); + writer.WriteSe(0); writer.WriteUe(0); - writer.WriteUe(0); + } - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); - writer.WriteBit(false); - writer.WriteUe(0); - writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); - writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); - writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); - writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); - s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); - pic_init_qp = (pic_init_qp << 26) >> 26; - writer.WriteSe(pic_init_qp); - writer.WriteSe(0); - s32 chroma_qp_index_offset = - static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); - chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; + const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - writer.WriteSe(chroma_qp_index_offset); - writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); - writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); - writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); - writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); + } + writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); + writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); + s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); + std::span<const u8> matrix{context.weight_scale}; + writer.WriteScalingList(matrix, index * 16, 16); + } - for (s32 index = 0; index < 6; index++) { + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); - const auto matrix_x4 = - std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); - writer.WriteScalingList(matrix_x4, index * 16, 16); - } - - if (context.h264_parameter_set.transform_8x8_mode_flag) { - for (s32 index = 0; index < 2; index++) { - writer.WriteBit(true); - const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), - context.scaling_matrix_8.end()); - - writer.WriteScalingList(matrix_x8, index * 64, 64); - } + std::span<const u8> matrix{context.weight_scale_8x8}; + writer.WriteScalingList(matrix, index * 64, 64); } + } - s32 chroma_qp_index_offset2 = - static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); - chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; + s32 chroma_qp_index_offset2 = + static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); - writer.WriteSe(chroma_qp_index_offset2); + writer.WriteSe(chroma_qp_index_offset2); - writer.End(); + writer.End(); - const auto& encoded_header = writer.GetByteArray(); - frame.resize(encoded_header.size() + context.frame_data_size); - std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.stream_len); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, - frame.data() + encoded_header.size(), - context.frame_data_size); - } + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), context.stream_len); return frame; } @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { +void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { std::vector<u8> scan(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3..bfe84a472 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -20,7 +20,9 @@ #pragma once +#include <span> #include <vector> +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -48,7 +50,7 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); + void WriteScalingList(std::span<const u8> list, s32 start, s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector<u8>& GetByteArray(); @@ -78,40 +80,110 @@ public: const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: + std::vector<u8> frame; + GPU& gpu; + struct H264ParameterSet { - u32 log2_max_pic_order_cnt{}; - u32 delta_pic_order_always_zero_flag{}; - u32 frame_mbs_only_flag{}; - u32 pic_width_in_mbs{}; - u32 pic_height_in_map_units{}; - INSERT_PADDING_WORDS(1); - u32 entropy_coding_mode_flag{}; - u32 bottom_field_pic_order_flag{}; - u32 num_refidx_l0_default_active{}; - u32 num_refidx_l1_default_active{}; - u32 deblocking_filter_control_flag{}; - u32 redundant_pic_count_flag{}; - u32 transform_8x8_mode_flag{}; - INSERT_PADDING_WORDS(9); - u64 flags{}; - u32 frame_number{}; - u32 frame_number2{}; + s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 + s32 delta_pic_order_always_zero_flag; ///< 0x04 + s32 frame_mbs_only_flag; ///< 0x08 + u32 pic_width_in_mbs; ///< 0x0C + u32 frame_height_in_map_units; ///< 0x10 + union { ///< 0x14 + BitField<0, 2, u32> tile_format; + BitField<2, 3, u32> gob_height; + }; + u32 entropy_coding_mode_flag; ///< 0x18 + s32 pic_order_present_flag; ///< 0x1C + s32 num_refidx_l0_default_active; ///< 0x20 + s32 num_refidx_l1_default_active; ///< 0x24 + s32 deblocking_filter_control_present_flag; ///< 0x28 + s32 redundant_pic_cnt_present_flag; ///< 0x2C + u32 transform_8x8_mode_flag; ///< 0x30 + u32 pitch_luma; ///< 0x34 + u32 pitch_chroma; ///< 0x38 + u32 luma_top_offset; ///< 0x3C + u32 luma_bot_offset; ///< 0x40 + u32 luma_frame_offset; ///< 0x44 + u32 chroma_top_offset; ///< 0x48 + u32 chroma_bot_offset; ///< 0x4C + u32 chroma_frame_offset; ///< 0x50 + u32 hist_buffer_size; ///< 0x54 + union { ///< 0x58 + union { + BitField<0, 1, u64> mbaff_frame; + BitField<1, 1, u64> direct_8x8_inference; + BitField<2, 1, u64> weighted_pred; + BitField<3, 1, u64> constrained_intra_pred; + BitField<4, 1, u64> ref_pic; + BitField<5, 1, u64> field_pic; + BitField<6, 1, u64> bottom_field; + BitField<7, 1, u64> second_field; + } flags; + BitField<8, 4, u64> log2_max_frame_num_minus4; + BitField<12, 2, u64> chroma_format_idc; + BitField<14, 2, u64> pic_order_cnt_type; + BitField<16, 6, s64> pic_init_qp_minus26; + BitField<22, 5, s64> chroma_qp_index_offset; + BitField<27, 5, s64> second_chroma_qp_index_offset; + BitField<32, 2, u64> weighted_bipred_idc; + BitField<34, 7, u64> curr_pic_idx; + BitField<41, 5, u64> curr_col_idx; + BitField<46, 16, u64> frame_number; + BitField<62, 1, u64> frame_surfaces; + BitField<63, 1, u64> output_memory_layout; + }; }; - static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); + static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); struct H264DecoderContext { - INSERT_PADDING_BYTES(0x48); - u32 frame_data_size{}; - INSERT_PADDING_BYTES(0xc); - H264ParameterSet h264_parameter_set{}; - INSERT_PADDING_BYTES(0x100); - std::array<u8, 0x60> scaling_matrix_4; - std::array<u8, 0x80> scaling_matrix_8; + INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 + u32 stream_len; ///< 0x0048 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C + H264ParameterSet h264_parameter_set; ///< 0x0058 + INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 + std::array<u8, 0x60> weight_scale; ///< 0x01C0 + std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 }; - static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); - - std::vector<u8> frame; - GPU& gpu; + static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264ParameterSet, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); + ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); + ASSERT_POSITION(frame_mbs_only_flag, 0x08); + ASSERT_POSITION(pic_width_in_mbs, 0x0C); + ASSERT_POSITION(frame_height_in_map_units, 0x10); + ASSERT_POSITION(tile_format, 0x14); + ASSERT_POSITION(entropy_coding_mode_flag, 0x18); + ASSERT_POSITION(pic_order_present_flag, 0x1C); + ASSERT_POSITION(num_refidx_l0_default_active, 0x20); + ASSERT_POSITION(num_refidx_l1_default_active, 0x24); + ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); + ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); + ASSERT_POSITION(transform_8x8_mode_flag, 0x30); + ASSERT_POSITION(pitch_luma, 0x34); + ASSERT_POSITION(pitch_chroma, 0x38); + ASSERT_POSITION(luma_top_offset, 0x3C); + ASSERT_POSITION(luma_bot_offset, 0x40); + ASSERT_POSITION(luma_frame_offset, 0x44); + ASSERT_POSITION(chroma_top_offset, 0x48); + ASSERT_POSITION(chroma_bot_offset, 0x4C); + ASSERT_POSITION(chroma_frame_offset, 0x50); + ASSERT_POSITION(hist_buffer_size, 0x54); + ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264DecoderContext, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(stream_len, 0x48); + ASSERT_POSITION(h264_parameter_set, 0x58); + ASSERT_POSITION(weight_scale, 0x1C0); +#undef ASSERT_POSITION }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb31418..902bc2a98 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { - PictureInfo picture_info{}; + PictureInfo picture_info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { - EntropyProbs entropy{}; + EntropyProbs entropy; gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c..2da14f3ca 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -15,10 +15,10 @@ class GPU; namespace Decoder { struct Vp9FrameDimensions { - s16 width{}; - s16 height{}; - s16 luma_pitch{}; - s16 chroma_pitch{}; + s16 width; + s16 height; + s16 luma_pitch; + s16 chroma_pitch; }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); @@ -49,87 +49,87 @@ enum class TxMode { }; struct Segmentation { - u8 enabled{}; - u8 update_map{}; - u8 temporal_update{}; - u8 abs_delta{}; - std::array<u32, 8> feature_mask{}; - std::array<std::array<s16, 4>, 8> feature_data{}; + u8 enabled; + u8 update_map; + u8 temporal_update; + u8 abs_delta; + std::array<u32, 8> feature_mask; + std::array<std::array<s16, 4>, 8> feature_data; }; static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); struct LoopFilter { - u8 mode_ref_delta_enabled{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; + u8 mode_ref_delta_enabled; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; }; static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); struct Vp9EntropyProbs { - std::array<u8, 36> y_mode_prob{}; - std::array<u8, 64> partition_prob{}; - std::array<u8, 1728> coef_probs{}; - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 3> skip_probs{}; - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 20> classes{}; - std::array<u8, 2> class_0{}; - std::array<u8, 20> prob_bits{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; + std::array<u8, 36> y_mode_prob; ///< 0x0000 + std::array<u8, 64> partition_prob; ///< 0x0024 + std::array<u8, 1728> coef_probs; ///< 0x0064 + std::array<u8, 8> switchable_interp_prob; ///< 0x0724 + std::array<u8, 28> inter_mode_prob; ///< 0x072C + std::array<u8, 4> intra_inter_prob; ///< 0x0748 + std::array<u8, 5> comp_inter_prob; ///< 0x074C + std::array<u8, 10> single_ref_prob; ///< 0x0751 + std::array<u8, 5> comp_ref_prob; ///< 0x075B + std::array<u8, 6> tx_32x32_prob; ///< 0x0760 + std::array<u8, 4> tx_16x16_prob; ///< 0x0766 + std::array<u8, 2> tx_8x8_prob; ///< 0x076A + std::array<u8, 3> skip_probs; ///< 0x076C + std::array<u8, 3> joints; ///< 0x076F + std::array<u8, 2> sign; ///< 0x0772 + std::array<u8, 20> classes; ///< 0x0774 + std::array<u8, 2> class_0; ///< 0x0788 + std::array<u8, 20> prob_bits; ///< 0x078A + std::array<u8, 12> class_0_fr; ///< 0x079E + std::array<u8, 6> fr; ///< 0x07AA + std::array<u8, 2> class_0_hp; ///< 0x07B0 + std::array<u8, 2> high_precision; ///< 0x07B2 }; static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame{}; - bool intra_only{}; - bool last_frame_was_key{}; - bool frame_size_changed{}; - bool error_resilient_mode{}; - bool last_frame_shown{}; - bool show_frame{}; - std::array<s8, 4> ref_frame_sign_bias{}; - s32 base_q_index{}; - s32 y_dc_delta_q{}; - s32 uv_dc_delta_q{}; - s32 uv_ac_delta_q{}; - bool lossless{}; - s32 transform_mode{}; - bool allow_high_precision_mv{}; - s32 interp_filter{}; - s32 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - s32 log2_tile_cols{}; - s32 log2_tile_rows{}; - bool segment_enabled{}; - bool segment_map_update{}; - bool segment_map_temporal_update{}; - s32 segment_abs_delta{}; - std::array<u32, 8> segment_feature_enable{}; - std::array<std::array<s16, 4>, 8> segment_feature_data{}; - bool mode_ref_delta_enabled{}; - bool use_prev_in_find_mv_refs{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; - Vp9EntropyProbs entropy{}; - Vp9FrameDimensions frame_size{}; - u8 first_level{}; - u8 sharpness_level{}; - u32 bitstream_size{}; - std::array<u64, 4> frame_offsets{}; - std::array<bool, 4> refresh_frame{}; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool frame_size_changed; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + std::array<s8, 4> ref_frame_sign_bias; + s32 base_q_index; + s32 y_dc_delta_q; + s32 uv_dc_delta_q; + s32 uv_ac_delta_q; + bool lossless; + s32 transform_mode; + bool allow_high_precision_mv; + s32 interp_filter; + s32 reference_mode; + s8 comp_fixed_ref; + std::array<s8, 2> comp_var_ref; + s32 log2_tile_cols; + s32 log2_tile_rows; + bool segment_enabled; + bool segment_map_update; + bool segment_map_temporal_update; + s32 segment_abs_delta; + std::array<u32, 8> segment_feature_enable; + std::array<std::array<s16, 4>, 8> segment_feature_data; + bool mode_ref_delta_enabled; + bool use_prev_in_find_mv_refs; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; + Vp9EntropyProbs entropy; + Vp9FrameDimensions frame_size; + u8 first_level; + u8 sharpness_level; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; + std::array<bool, 4> refresh_frame; }; struct Vp9FrameContainer { @@ -138,35 +138,35 @@ struct Vp9FrameContainer { }; struct PictureInfo { - INSERT_PADDING_WORDS(12); - u32 bitstream_size{}; - INSERT_PADDING_WORDS(5); - Vp9FrameDimensions last_frame_size{}; - Vp9FrameDimensions golden_frame_size{}; - Vp9FrameDimensions alt_frame_size{}; - Vp9FrameDimensions current_frame_size{}; - u32 vp9_flags{}; - std::array<s8, 4> ref_frame_sign_bias{}; - u8 first_level{}; - u8 sharpness_level{}; - u8 base_q_index{}; - u8 y_dc_delta_q{}; - u8 uv_ac_delta_q{}; - u8 uv_dc_delta_q{}; - u8 lossless{}; - u8 tx_mode{}; - u8 allow_high_precision_mv{}; - u8 interp_filter{}; - u8 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - u8 log2_tile_cols{}; - u8 log2_tile_rows{}; - Segmentation segmentation{}; - LoopFilter loop_filter{}; - INSERT_PADDING_BYTES(5); - u32 surface_params{}; - INSERT_PADDING_WORDS(3); + INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 + u32 bitstream_size; ///< 0x30 + INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 + Vp9FrameDimensions last_frame_size; ///< 0x48 + Vp9FrameDimensions golden_frame_size; ///< 0x50 + Vp9FrameDimensions alt_frame_size; ///< 0x58 + Vp9FrameDimensions current_frame_size; ///< 0x60 + u32 vp9_flags; ///< 0x68 + std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C + u8 first_level; ///< 0x70 + u8 sharpness_level; ///< 0x71 + u8 base_q_index; ///< 0x72 + u8 y_dc_delta_q; ///< 0x73 + u8 uv_ac_delta_q; ///< 0x74 + u8 uv_dc_delta_q; ///< 0x75 + u8 lossless; ///< 0x76 + u8 tx_mode; ///< 0x77 + u8 allow_high_precision_mv; ///< 0x78 + u8 interp_filter; ///< 0x79 + u8 reference_mode; ///< 0x7A + s8 comp_fixed_ref; ///< 0x7B + std::array<s8, 2> comp_var_ref; ///< 0x7C + u8 log2_tile_cols; ///< 0x7E + u8 log2_tile_rows; ///< 0x7F + Segmentation segmentation; ///< 0x80 + LoopFilter loop_filter; ///< 0xE4 + INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB + u32 surface_params; ///< 0xF0 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 [[nodiscard]] Vp9PictureInfo Convert() const { return { @@ -176,6 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .show_frame = false, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, @@ -204,45 +205,48 @@ struct PictureInfo { !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, + .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, .bitstream_size = bitstream_size, + .frame_offsets{}, + .refresh_frame{}, }; } }; static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); struct EntropyProbs { - INSERT_PADDING_BYTES(1024); - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - INSERT_PADDING_BYTES(80); - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> y_mode_prob_e8{}; - std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; - INSERT_PADDING_BYTES(64); - std::array<u8, 64> partition_prob{}; - INSERT_PADDING_BYTES(10); - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 3> skip_probs{}; - INSERT_PADDING_BYTES(1); - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 2> class_0{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; - std::array<u8, 20> classes{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 20> pred_bits{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - INSERT_PADDING_BYTES(17); - std::array<u8, 2304> coef_probs{}; + INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 + std::array<u8, 28> inter_mode_prob; ///< 0x0400 + std::array<u8, 4> intra_inter_prob; ///< 0x041C + INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 + std::array<u8, 2> tx_8x8_prob; ///< 0x0470 + std::array<u8, 4> tx_16x16_prob; ///< 0x0472 + std::array<u8, 6> tx_32x32_prob; ///< 0x0476 + std::array<u8, 4> y_mode_prob_e8; ///< 0x047C + std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 + INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 + std::array<u8, 64> partition_prob; ///< 0x04E0 + INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 + std::array<u8, 8> switchable_interp_prob; ///< 0x052A + std::array<u8, 5> comp_inter_prob; ///< 0x0532 + std::array<u8, 3> skip_probs; ///< 0x0537 + INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A + std::array<u8, 3> joints; ///< 0x053B + std::array<u8, 2> sign; ///< 0x053E + std::array<u8, 2> class_0; ///< 0x0540 + std::array<u8, 6> fr; ///< 0x0542 + std::array<u8, 2> class_0_hp; ///< 0x0548 + std::array<u8, 2> high_precision; ///< 0x054A + std::array<u8, 20> classes; ///< 0x054C + std::array<u8, 12> class_0_fr; ///< 0x0560 + std::array<u8, 20> pred_bits; ///< 0x056C + std::array<u8, 10> single_ref_prob; ///< 0x0580 + std::array<u8, 5> comp_ref_prob; ///< 0x058A + INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F + std::array<u8, 2304> coef_probs; ///< 0x05A0 void Convert(Vp9EntropyProbs& fc) { fc.inter_mode_prob = inter_mode_prob; @@ -293,10 +297,45 @@ struct RefPoolElement { }; struct FrameContexts { - s64 from{}; - bool adapted{}; - Vp9EntropyProbs probs{}; + s64 from; + bool adapted; + Vp9EntropyProbs probs; }; +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(partition_prob, 0x0024); +ASSERT_POSITION(switchable_interp_prob, 0x0724); +ASSERT_POSITION(sign, 0x0772); +ASSERT_POSITION(class_0_fr, 0x079E); +ASSERT_POSITION(high_precision, 0x07B2); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(PictureInfo, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(bitstream_size, 0x30); +ASSERT_POSITION(last_frame_size, 0x48); +ASSERT_POSITION(first_level, 0x70); +ASSERT_POSITION(segmentation, 0x80); +ASSERT_POSITION(loop_filter, 0xE4); +ASSERT_POSITION(surface_params, 0xF0); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(inter_mode_prob, 0x400); +ASSERT_POSITION(tx_8x8_prob, 0x470); +ASSERT_POSITION(partition_prob, 0x4E0); +ASSERT_POSITION(class_0, 0x540); +ASSERT_POSITION(class_0_fr, 0x560); +ASSERT_POSITION(coef_probs, 0x5A0); +#undef ASSERT_POSITION + }; // namespace Decoder }; // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd..b5e3b70fc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -8,22 +8,21 @@ namespace Tegra { -Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} +#define NVDEC_REG_INDEX(field_name) \ + (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, u32 argument) { - if (method == Method::SetVideoCodec) { - codec->StateWrite(static_cast<u32>(method), argument); - } else { - codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); - } +void Nvdec::ProcessMethod(u32 method, u32 argument) { + state.reg_array[method] = static_cast<u64>(argument) << 8; switch (method) { - case Method::SetVideoCodec: + case NVDEC_REG_INDEX(set_codec_id): codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); break; - case Method::Execute: + case NVDEC_REG_INDEX(execute): Execute(); break; } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b8..6e1da0b04 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -14,16 +14,11 @@ class GPU; class Nvdec { public: - enum class Method : u32 { - SetVideoCodec = 0x80, - Execute = 0xc0, - }; - explicit Nvdec(GPU& gpu); ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, u32 argument); + void ProcessMethod(u32 method, u32 argument); /// Return most recently decoded frame [[nodiscard]] AVFramePtr GetFrame(); @@ -33,6 +28,7 @@ private: void Execute(); GPU& gpu; + NvdecCommon::NvdecRegisters state; std::unique_ptr<Codec> codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d..6a24e00a0 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -4,40 +4,13 @@ #pragma once +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra::NvdecCommon { -struct NvdecRegisters { - INSERT_PADDING_WORDS(256); - u64 set_codec_id{}; - INSERT_PADDING_WORDS(254); - u64 set_platform_id{}; - u64 picture_info_offset{}; - u64 frame_bitstream_offset{}; - u64 frame_number{}; - u64 h264_slice_data_offsets{}; - u64 h264_mv_dump_offset{}; - INSERT_PADDING_WORDS(6); - u64 frame_stats_offset{}; - u64 h264_last_surface_luma_offset{}; - u64 h264_last_surface_chroma_offset{}; - std::array<u64, 17> surface_luma_offset{}; - std::array<u64, 17> surface_chroma_offset{}; - INSERT_PADDING_WORDS(132); - u64 vp9_entropy_probs_offset{}; - u64 vp9_backward_updates_offset{}; - u64 vp9_last_frame_segmap_offset{}; - u64 vp9_curr_frame_segmap_offset{}; - INSERT_PADDING_WORDS(2); - u64 vp9_last_frame_mvs_offset{}; - u64 vp9_curr_frame_mvs_offset{}; - INSERT_PADDING_WORDS(2); -}; -static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); - -enum class VideoCodec : u32 { +enum class VideoCodec : u64 { None = 0x0, H264 = 0x3, Vp8 = 0x5, @@ -45,4 +18,76 @@ enum class VideoCodec : u32 { Vp9 = 0x9, }; +// NVDEC should use a 32-bit address space, but is mapped to 64-bit, +// doubling the sizes here is compensating for that. +struct NvdecRegisters { + static constexpr std::size_t NUM_REGS = 0x178; + + union { + struct { + INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 + VideoCodec set_codec_id; ///< 0x0400 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 + u64 execute; ///< 0x0600 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 + struct { ///< 0x0800 + union { + BitField<0, 3, VideoCodec> codec; + BitField<4, 1, u64> gp_timer_on; + BitField<13, 1, u64> mb_timer_on; + BitField<14, 1, u64> intra_frame_pslc; + BitField<17, 1, u64> all_intra_frame; + }; + } control_params; + u64 picture_info_offset; ///< 0x0808 + u64 frame_bitstream_offset; ///< 0x0810 + u64 frame_number; ///< 0x0818 + u64 h264_slice_data_offsets; ///< 0x0820 + u64 h264_mv_dump_offset; ///< 0x0828 + INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 + u64 frame_stats_offset; ///< 0x0848 + u64 h264_last_surface_luma_offset; ///< 0x0850 + u64 h264_last_surface_chroma_offset; ///< 0x0858 + std::array<u64, 17> surface_luma_offset; ///< 0x0860 + std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 + INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 + u64 vp9_entropy_probs_offset; ///< 0x0B80 + u64 vp9_backward_updates_offset; ///< 0x0B88 + u64 vp9_last_frame_segmap_offset; ///< 0x0B90 + u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 + u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 + u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 + }; + std::array<u64, NUM_REGS> reg_array; + }; +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(set_codec_id, 0x80); +ASSERT_REG_POSITION(execute, 0xC0); +ASSERT_REG_POSITION(control_params, 0x100); +ASSERT_REG_POSITION(picture_info_offset, 0x101); +ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); +ASSERT_REG_POSITION(frame_number, 0x103); +ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); +ASSERT_REG_POSITION(frame_stats_offset, 0x109); +ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); +ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); +ASSERT_REG_POSITION(surface_luma_offset, 0x10C); +ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); +ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); +ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); +ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); +ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); +ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); + +#undef ASSERT_REG_POSITION + } // namespace Tegra::NvdecCommon diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 0a8b82f2b..ff3db0aee 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -3,7 +3,21 @@ // Refer to the license.txt file included. #include <array> + +extern "C" { +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif +#include <libswscale/swscale.h> +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + #include "common/assert.h" +#include "common/logging/log.h" + #include "video_core/command_classes/nvdec.h" #include "video_core/command_classes/vic.h" #include "video_core/engines/maxwell_3d.h" @@ -11,10 +25,6 @@ #include "video_core/memory_manager.h" #include "video_core/textures/decoders.h" -extern "C" { -#include <libswscale/swscale.h> -} - namespace Tegra { Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 0f640fdae..f26530ede 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -7,6 +7,10 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" + +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; namespace Tegra::Engines { @@ -49,7 +53,7 @@ void Fermi2D::Blit() { UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); const auto& args = regs.pixels_from_memory; - const Config config{ + Config config{ .operation = regs.operation, .filter = args.sample_mode.filter, .dst_x0 = args.dst_x0, @@ -61,7 +65,21 @@ void Fermi2D::Blit() { .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; - if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { + Surface src = regs.src; + const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); + const auto need_align_to_pitch = + src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && + static_cast<s32>(src.width) == config.src_x1 && + config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0; + if (need_align_to_pitch) { + auto address = src.Address() + config.src_x0 * bytes_per_pixel; + src.addr_upper = static_cast<u32>(address >> 32); + src.addr_lower = static_cast<u32>(address); + src.width -= config.src_x0; + config.src_x1 -= config.src_x0; + config.src_x0 = 0; + } + if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 2208e1922..c9cff7450 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -18,7 +18,10 @@ set(SHADER_FILES vulkan_uint8.comp ) -find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) +find_program(GLSLANGVALIDATOR "glslangValidator") +if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND") + message(FATAL_ERROR "Required program `glslangValidator` not found.") +endif() set(GLSL_FLAGS "") set(QUIET_FLAG "--quiet") diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7124c755c..d2b9d5f2b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - rasterizer->UnmapMemory(*cpu_addr, size); + const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); + + for (const auto& map : submapped_ranges) { + // Flush and invalidate through the GPU interface, to be asynchronous if possible. + const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first); + ASSERT(cpu_addr); + + rasterizer->UnmapMemory(*cpu_addr, map.second); + } UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } @@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s //// Lock the new page // TryLockPage(page_entry, size); + auto& current_page = page_table[PageEntryIndex(gpu_addr)]; - page_table[PageEntryIndex(gpu_addr)] = page_entry; + if ((!current_page.IsValid() && page_entry.IsValid()) || + current_page.ToAddress() != page_entry.ToAddress()) { + rasterizer->ModifyGPUMemory(gpu_addr, size); + } + + current_page = page_entry; } std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, @@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { return page_entry.ToAddress() + (gpu_addr & page_mask); } +std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { + size_t page_index{addr >> page_bits}; + const size_t page_last{(addr + size + page_size - 1) >> page_bits}; + while (page_index < page_last) { + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (page_addr && *page_addr != 0) { + return page_addr; + } + ++page_index; + } + return std::nullopt; +} + template <typename T> T MemoryManager::Read(GPUVAddr addr) const { if (auto page_pointer{GetPointer(addr)}; page_pointer) { @@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { return page <= Core::Memory::PAGE_SIZE; } +bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { + size_t page_index{gpu_addr >> page_bits}; + const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; + std::optional<VAddr> old_page_addr{}; + while (page_index != page_last) { + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (!page_addr || *page_addr == 0) { + return false; + } + if (old_page_addr) { + if (*old_page_addr + page_size != *page_addr) { + return false; + } + } + old_page_addr = page_addr; + ++page_index; + } + return true; +} + +bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { + size_t page_index{gpu_addr >> page_bits}; + const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; + while (page_index < page_last) { + if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { + return false; + } + ++page_index; + } + return true; +} + +std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( + GPUVAddr gpu_addr, std::size_t size) const { + std::vector<std::pair<GPUVAddr, std::size_t>> result{}; + size_t page_index{gpu_addr >> page_bits}; + size_t remaining_size{size}; + size_t page_offset{gpu_addr & page_mask}; + std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; + std::optional<VAddr> old_page_addr{}; + const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) { + if (!last_segment) { + GPUVAddr new_base_addr = page_index << page_bits; + last_segment = {new_base_addr, bytes}; + } else { + last_segment->second += bytes; + } + }; + const auto split = [this, &last_segment, &result] { + if (last_segment) { + result.push_back(*last_segment); + last_segment = std::nullopt; + } + }; + while (remaining_size > 0) { + const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (!page_addr) { + split(); + } else if (old_page_addr) { + if (*old_page_addr + page_size != *page_addr) { + split(); + } + extend_size(num_bytes); + } else { + extend_size(num_bytes); + } + ++page_index; + page_offset = 0; + remaining_size -= num_bytes; + } + split(); + return result; +} + } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b3538d503..99d13e7f6 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -76,6 +76,8 @@ public: [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; + [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; + template <typename T> [[nodiscard]] T Read(GPUVAddr addr) const; @@ -112,10 +114,28 @@ public: void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); /** - * IsGranularRange checks if a gpu region can be simply read with a pointer. + * Checks if a gpu region can be simply read with a pointer. */ [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; + /** + * Checks if a gpu region is mapped by a single range of cpu addresses. + */ + [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; + + /** + * Checks if a gpu region is mapped entirely. + */ + [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; + + /** + * Returns a vector with all the subranges of cpu addresses mapped beneath. + * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector + * will be returned; + */ + std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, + std::size_t size) const; + [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 07939432f..0cec4225b 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -87,6 +87,9 @@ public: /// Unmap memory range virtual void UnmapMemory(VAddr addr, u64 size) = 0; + /// Remap GPU memory range. This means underneath backing memory changed + virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 320ee8d30..63d8ad42a 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -42,6 +42,8 @@ public: [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; + [[nodiscard]] virtual std::string GetDeviceVendor() const = 0; + // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f4532ca7..3b00614e7 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -202,13 +202,13 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); throw std::runtime_error{"Insufficient version"}; } - const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); + vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); - const bool is_nvidia = vendor == "NVIDIA Corporation"; - const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_intel = vendor == "Intel"; + const bool is_nvidia = vendor_name == "NVIDIA Corporation"; + const bool is_amd = vendor_name == "ATI Technologies Inc."; + const bool is_intel = vendor_name == "Intel"; #ifdef __unix__ const bool is_linux = true; @@ -275,6 +275,56 @@ Device::Device() { } } +std::string Device::GetVendorName() const { + if (vendor_name == "NVIDIA Corporation") { + return "NVIDIA"; + } + if (vendor_name == "ATI Technologies Inc.") { + return "AMD"; + } + if (vendor_name == "Intel") { + // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris. + // Simply return `INTEL` for those as well as the Windows driver. + return "INTEL"; + } + if (vendor_name == "Intel Open Source Technology Center") { + return "I965"; + } + if (vendor_name == "Mesa Project") { + return "I915"; + } + if (vendor_name == "Mesa/X.org") { + // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return + // MESA instead of one of those driver names. + return "MESA"; + } + if (vendor_name == "AMD") { + return "RADEONSI"; + } + if (vendor_name == "nouveau") { + return "NOUVEAU"; + } + if (vendor_name == "X.Org") { + return "R600"; + } + if (vendor_name == "Collabora Ltd") { + return "ZINK"; + } + if (vendor_name == "Intel Corporation") { + return "OPENSWR"; + } + if (vendor_name == "Microsoft Corporation") { + return "D3D12"; + } + if (vendor_name == "NVIDIA") { + // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default + // strategy would have returned `NVIDIA` here for this driver, the same result as the + // proprietary driver. + return "TEGRA"; + } + return vendor_name; +} + Device::Device(std::nullptr_t) { max_uniform_buffers.fill(std::numeric_limits<u32>::max()); uniform_buffer_alignment = 4; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index f24bd0c7b..2c2b13767 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -22,6 +22,8 @@ public: explicit Device(); explicit Device(std::nullptr_t); + [[nodiscard]] std::string GetVendorName() const; + u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; } @@ -130,6 +132,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); + std::string vendor_name; std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; size_t uniform_buffer_alignment{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eb8bdaa85..07ad0e205 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { shader_cache.OnCPUWrite(addr, size); } +void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UnmapGPUMemory(addr, size); + } +} + void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9995a563b..482efed7a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -80,6 +80,7 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; + void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 23948feed..ff0f03e99 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -327,7 +327,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 if (format_info.is_compressed) { return false; } - if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { + if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) == + ACCELERATED_FORMATS.end()) { return false; } if (format_info.compatibility_by_size) { @@ -341,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, VideoCommon::SubresourceLayers subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(0), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(subresource.base_layer), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyOrigin{ @@ -366,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 VideoCommon::SubresourceLayers dst_subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(1), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(dst_subresource.num_layers), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyRegion{ diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cc19a110f..0b66f8332 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -70,6 +70,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetVendorName(); + } + private: /// Initializes the OpenGL state and creates persistent objects. void InitOpenGLObjects(); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index abaf1ee6a..8fb5be393 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), - copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI); glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), - copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); + copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI); glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); } program_manager.RestoreGuestCompute(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 72071316c..d7d17e110 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -47,6 +47,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetDriverName(); + } + private: void Report() const; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8cb65e588..0df4e1a1c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) { template <typename T> std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; - std::ranges::transform(indices, indices.begin(), - [quad, first](u32 index) { return first + index + quad * 4; }); + for (T& index : indices) { + index = static_cast<T>(first + index + quad * 4); + } return indices; } } // Anonymous namespace diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1c9120170..bd4d649cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { pipeline_cache.OnCPUWrite(addr, size); } +void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UnmapGPUMemory(addr, size); + } +} + void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index cb8c5c279..41459c5c5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -72,6 +72,7 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; + void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index ad69d32d1..6052d148a 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ } } +ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) + : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} + std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { if (other_addr < gpu_addr) { // Subresource address can't be lower than the base @@ -82,7 +85,7 @@ std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const if (info.type != ImageType::e3D) { const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); const auto end = mip_level_offsets.begin() + info.resources.levels; - const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); + const auto it = std::find(mip_level_offsets.begin(), end, static_cast<u32>(mip_offset)); if (layer > info.resources.layers || it == end) { return std::nullopt; } diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e326cab71..ff1feda9b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 { Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked + Remapped = 1 << 8, ///< Image has been remapped. + Sparse = 1 << 9, ///< Image has non continous submemory. // Garbage Collection Flags - BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 9, ///< This image has aliases and has priority on garbage - ///< collection + BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 11, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -57,6 +59,12 @@ struct ImageBase { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_gpu_addr + overlap_size; + const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; + return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; + } + void CheckBadOverlapState(); void CheckAliasState(); @@ -84,6 +92,29 @@ struct ImageBase { std::vector<AliasedImage> aliased_images; std::vector<ImageId> overlapping_images; + ImageMapId map_view_id{}; +}; + +struct ImageMapView { + explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id); + + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_cpu_addr + overlap_size; + const VAddr cpu_addr_end = cpu_addr + size; + return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; + } + + [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { + const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size; + const GPUVAddr gpu_addr_end = gpu_addr + size; + return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; + } + + GPUVAddr gpu_addr; + VAddr cpu_addr; + size_t size; + ImageId image_id; + bool picked{}; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 84530a179..01de2d498 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -13,6 +13,7 @@ #include <span> #include <type_traits> #include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> @@ -110,9 +111,6 @@ public: /// Notify the cache that a new frame has been queued void TickFrame(); - /// Runs the Garbage Collector. - void RunGarbageCollector(); - /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; @@ -155,12 +153,13 @@ public: /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); + /// Remove images in a region + void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy, - std::optional<Region2D> src_region_override = {}, - std::optional<Region2D> dst_region_override = {}); + const Tegra::Engines::Fermi2D::Config& copy); /// Invalidate the contents of the color buffer index /// These contents become unspecified, the cache can assume aggressive optimizations. @@ -193,7 +192,22 @@ public: private: /// Iterate over all page indices in a range template <typename Func> - static void ForEachPage(VAddr addr, size_t size, Func&& func) { + static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + template <typename Func> + static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; const u64 page_end = (addr + size - 1) >> PAGE_BITS; for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { @@ -207,6 +221,9 @@ private: } } + /// Runs the Garbage Collector. + void RunGarbageCollector(); + /// Fills image_view_ids in the image views in indices void FillImageViews(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, @@ -220,7 +237,7 @@ private: FramebufferId GetFramebufferId(const RenderTargets& key); /// Refresh the contents (pixel data) of an image - void RefreshContents(Image& image); + void RefreshContents(Image& image, ImageId image_id); /// Upload data from guest to an image template <typename StagingBuffer> @@ -269,6 +286,16 @@ private: template <typename Func> void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + template <typename Func> + void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + + template <typename Func> + void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + + /// Iterates over all the images in a region calling func + template <typename Func> + void ForEachSparseSegment(ImageBase& image, Func&& func); + /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -279,10 +306,10 @@ private: void UnregisterImage(ImageId image); /// Track CPU reads and writes for image - void TrackImage(ImageBase& image); + void TrackImage(ImageBase& image, ImageId image_id); /// Stop tracking CPU reads and writes for image - void UntrackImage(ImageBase& image); + void UntrackImage(ImageBase& image, ImageId image_id); /// Delete image from the cache void DeleteImage(ImageId image); @@ -340,7 +367,13 @@ private: std::unordered_map<TSCEntry, SamplerId> samplers; std::unordered_map<RenderTargets, FramebufferId> framebuffers; - std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; + std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; + + std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; + + VAddr virtual_invalid_space{}; bool has_deleted_images = false; u64 total_used_memory = 0; @@ -349,6 +382,7 @@ private: u64 critical_memory; SlotVector<Image> slot_images; + SlotVector<ImageMapView> slot_map_views; SlotVector<ImageView> slot_image_views; SlotVector<ImageAlloc> slot_image_allocs; SlotVector<Sampler> slot_samplers; @@ -459,7 +493,7 @@ void TextureCache<P>::RunGarbageCollector() { } } if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image); + UntrackImage(*image, image_id); } UnregisterImage(image_id); DeleteImage(image_id); @@ -658,7 +692,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { return; } image.flags |= ImageFlagBits::CpuModified; - UntrackImage(image); + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } }); } @@ -695,7 +731,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { for (const ImageId id : deleted_images) { Image& image = slot_images[id]; if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image); + UntrackImage(image, id); } UnregisterImage(id); DeleteImage(id); @@ -703,11 +739,26 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { } template <class P> +void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector<ImageId> deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} + +template <class P> void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy, - std::optional<Region2D> src_override, - std::optional<Region2D> dst_override) { + const Tegra::Engines::Fermi2D::Config& copy) { const BlitImages images = GetBlitImages(dst, src); const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; @@ -718,47 +769,25 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const ImageBase& src_image = slot_images[src_id]; // TODO: Deduplicate - const std::optional dst_base = dst_image.TryFindBase(dst.Address()); - const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - - // out of bounds texture blit checking - const bool use_override = src_override.has_value(); - const s32 src_x0 = copy.src_x0 >> src_samples_x; - s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x; - const s32 src_y0 = copy.src_y0 >> src_samples_y; - const s32 src_y1 = copy.src_y1 >> src_samples_y; - - const auto src_width = static_cast<s32>(src_image.info.size.width); - const bool width_oob = src_x1 > src_width; - const auto width_diff = width_oob ? src_x1 - src_width : 0; - if (width_oob) { - src_x1 = src_width; - } - - const Region2D src_dimensions{ - Offset2D{.x = src_x0, .y = src_y0}, - Offset2D{.x = src_x1, .y = src_y1}, - }; - const auto src_region = use_override ? *src_override : src_dimensions; - const std::optional src_base = src_image.TryFindBase(src.Address()); const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); - const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const Region2D src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; - const s32 dst_x0 = copy.dst_x0 >> dst_samples_x; - const s32 dst_x1 = copy.dst_x1 >> dst_samples_x; - const s32 dst_y0 = copy.dst_y0 >> dst_samples_y; - const s32 dst_y1 = copy.dst_y1 >> dst_samples_y; - const Region2D dst_dimensions{ - Offset2D{.x = dst_x0, .y = dst_y0}, - Offset2D{.x = dst_x1 - width_diff, .y = dst_y1}, + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const Region2D dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, }; - const auto dst_region = use_override ? *dst_override : dst_dimensions; // Always call this after src_framebuffer_id was queried, as the address might be invalidated. Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; @@ -775,21 +804,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, copy.operation); } - - if (width_oob) { - // Continue copy of the oob region of the texture on the next row - auto oob_src = src; - oob_src.height++; - const Region2D src_region_override{ - Offset2D{.x = 0, .y = src_y0 + 1}, - Offset2D{.x = width_diff, .y = src_y1 + 1}, - }; - const Region2D dst_region_override{ - Offset2D{.x = dst_x1 - width_diff, .y = dst_y0}, - Offset2D{.x = dst_x1, .y = dst_y1}, - }; - BlitImage(dst, oob_src, copy, src_region_override, dst_region_override); - } } template <class P> @@ -833,9 +847,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad if (it == page_table.end()) { return nullptr; } - const auto& image_ids = it->second; - for (const ImageId image_id : image_ids) { - const ImageBase& image = slot_images[image_id]; + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; if (image.cpu_addr != cpu_addr) { continue; } @@ -915,13 +930,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { } template <class P> -void TextureCache<P>::RefreshContents(Image& image) { +void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { if (False(image.flags & ImageFlagBits::CpuModified)) { // Only upload modified images return; } image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image); + TrackImage(image, image_id); if (image.info.num_samples > 1) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); @@ -958,7 +973,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) template <class P> ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { - if (!IsValidAddress(gpu_memory, config)) { + if (!IsValidEntry(gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } const auto [pair, is_new] = image_views.try_emplace(config); @@ -1000,14 +1015,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a template <class P> ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - return ImageId{}; + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } } const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { const bool strict_size = False(options & RelaxedOptions::Size) && True(existing_image.flags & ImageFlagBits::Strong); @@ -1033,7 +1054,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, template <class P> ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional<VAddr>(fake_addr); + } + } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; @@ -1053,11 +1083,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); std::vector<ImageId> overlap_ids; + std::unordered_set<ImageId> overlaps_found; std::vector<ImageId> left_aliased_ids; std::vector<ImageId> right_aliased_ids; + std::unordered_set<ImageId> ignore_textures; std::vector<ImageId> bad_overlap_ids; - ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { - if (info.type != overlap.info.type) { + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); return; } if (info.type == ImageType::Linear) { @@ -1067,6 +1100,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } return; } + overlaps_found.insert(overlap_id); static constexpr bool strict_size = true; const std::optional<OverlapResult> solution = ResolveOverlap( new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); @@ -1090,12 +1124,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bad_overlap_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::BadOverlap; } - }); + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; + } + + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + // TODO: Only upload what we need - RefreshContents(new_image); + RefreshContents(new_image, new_image_id); for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; @@ -1107,7 +1169,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA runtime.CopyImage(new_image, overlap, copies); } if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap); + UntrackImage(overlap, overlap_id); } UnregisterImage(overlap_id); DeleteImage(overlap_id); @@ -1242,7 +1304,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; boost::container::small_vector<ImageId, 32> images; - ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { + boost::container::small_vector<ImageMapId, 32> maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); if (it == page_table.end()) { if constexpr (BOOL_BREAK) { @@ -1251,12 +1314,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f return; } } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } for (const ImageId image_id : it->second) { Image& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::Picked)) { continue; } - if (!image.Overlaps(cpu_addr, size)) { + if (!image.OverlapsGPU(gpu_addr, size)) { continue; } image.flags |= ImageFlagBits::Picked; @@ -1279,6 +1435,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f } template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; + static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} + +template <class P> ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { Image& image = slot_images[image_id]; if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { @@ -1295,8 +1472,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { page_table[page].push_back(image_id); }); u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || @@ -1304,6 +1479,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector<ImageViewId> sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); } template <class P> @@ -1320,34 +1516,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); - ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector<ImageId>& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageId>& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); } template <class P> -void TextureCache<P>::TrackImage(ImageBase& image) { +void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); } template <class P> -void TextureCache<P>::UntrackImage(ImageBase& image) { +void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { ASSERT(True(image.flags & ImageFlagBits::Tracked)); image.flags &= ~ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } } template <class P> @@ -1489,10 +1776,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool if (invalidate) { image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image); + TrackImage(image, image_id); } } else { - RefreshContents(image); + RefreshContents(image, image_id); SynchronizeAliases(image_id); } if (is_modification) { diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index c9571f7e4..9fbdc1ac6 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14; constexpr SlotId CORRUPT_ID{0xfffffffe}; using ImageId = SlotId; +using ImageMapId = SlotId; using ImageViewId = SlotId; using ImageAllocId = SlotId; using SamplerId = SlotId; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 4efe042b6..c872517b8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -394,7 +394,7 @@ template <u32 GOB_EXTENT> const s32 mip_offset = diff % layer_stride; const std::array offsets = CalculateMipLevelOffsets(new_info); const auto end = offsets.begin() + new_info.resources.levels; - const auto it = std::find(offsets.begin(), end, mip_offset); + const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset)); if (it == end) { // Mipmap is not aligned to any valid size return std::nullopt; @@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept { return offsets; } +LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { + const u32 num_levels = info.resources.levels; + const LevelInfo level_info = MakeLevelInfo(info); + LevelArray sizes{}; + for (u32 level = 0; level < num_levels; ++level) { + sizes[level] = CalculateLevelSize(level_info, level); + } + return sizes; +} + std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); std::vector<u32> offsets; @@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { - if (config.Address() == 0) { +bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + const GPUVAddr address = config.Address(); + if (address == 0) { return false; } - if (config.Address() > (u64(1) << 48)) { + if (address > (1ULL << 48)) { return false; } - return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); + if (gpu_memory.GpuToCpuAddress(address).has_value()) { + return true; + } + const ImageInfo info{config}; + const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); + return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); } std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index cdc5cbc75..766502908 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -40,6 +40,8 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; +[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; + [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); @@ -55,7 +57,7 @@ struct OverlapResult { const ImageInfo& src, SubresourceBase base); -[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); +[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 7b756ba41..3ab500760 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -1365,8 +1365,8 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, // each partition. // Determine partitions, partition index, and color endpoint modes - s32 planeIdx = -1; - u32 partitionIndex; + u32 planeIdx{UINT32_MAX}; + u32 partitionIndex{}; u32 colorEndpointMode[4] = {0, 0, 0, 0}; // Define color data. diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 23814afd2..f214510da 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want return (supported_usage & wanted_usage) == wanted_usage; } +std::string Device::GetDriverName() const { + switch (driver_id) { + case VK_DRIVER_ID_AMD_PROPRIETARY: + return "AMD"; + case VK_DRIVER_ID_AMD_OPEN_SOURCE: + return "AMDVLK"; + case VK_DRIVER_ID_MESA_RADV: + return "RADV"; + case VK_DRIVER_ID_NVIDIA_PROPRIETARY: + return "NVIDIA"; + case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS: + return "INTEL"; + case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA: + return "ANV"; + case VK_DRIVER_ID_MESA_LLVMPIPE: + return "LAVAPIPE"; + default: + return vendor_name; + } +} + void Device::CheckSuitability(bool requires_swapchain) const { std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; bool has_swapchain = false; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 88b298196..96c0f8c60 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -45,6 +45,9 @@ public: /// Reports a shader to Nsight Aftermath. void SaveShader(const std::vector<u32>& spirv) const; + /// Returns the name of the VkDriverId reported from Vulkan. + std::string GetDriverName() const; + /// Returns the dispatch loader with direct function pointers of the device. const vk::DeviceDispatch& GetDispatchLoader() const { return dld; |