summaryrefslogtreecommitdiffstats
path: root/src/video_core/command_classes/codecs
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/command_classes/codecs')
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp144
-rw-r--r--src/video_core/command_classes/codecs/codec.h4
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp134
-rw-r--r--src/video_core/command_classes/codecs/vp9.h14
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h6
5 files changed, 161 insertions, 141 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 1b4bbc8ac..f798a0053 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <cstring>
#include <fstream>
#include <vector>
#include "common/assert.h"
@@ -17,10 +16,47 @@ extern "C" {
}
namespace Tegra {
+#if defined(LIBVA_FOUND)
+// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
+namespace {
+constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
+ "i915",
+ "amdgpu",
+};
+
+AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
+ for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
+ if (*p == AV_PIX_FMT_VAAPI) {
+ return AV_PIX_FMT_VAAPI;
+ }
+ }
+ LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
+ return *pix_fmts;
+}
+
+bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
+ AVDictionary* hwdevice_options = nullptr;
+ av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
+ for (const auto& driver : VAAPI_DRIVERS) {
+ av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
+ const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
+ nullptr, hwdevice_options, 0);
+ if (hwdevice_error >= 0) {
+ LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
+ av_dict_free(&hwdevice_options);
+ return true;
+ }
+ LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
+ }
+ LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
+ av_dict_free(&hwdevice_options);
+ return false;
+}
+} // namespace
+#endif
void AVFrameDeleter(AVFrame* ptr) {
- av_frame_unref(ptr);
- av_free(ptr);
+ av_frame_free(&ptr);
}
Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
@@ -32,19 +68,31 @@ Codec::~Codec() {
return;
}
// Free libav memory
- AVFrame* av_frame{nullptr};
avcodec_send_packet(av_codec_ctx, nullptr);
- av_frame = av_frame_alloc();
+ AVFrame* av_frame = av_frame_alloc();
avcodec_receive_frame(av_codec_ctx, av_frame);
avcodec_flush_buffers(av_codec_ctx);
-
- av_frame_unref(av_frame);
- av_free(av_frame);
+ av_frame_free(&av_frame);
avcodec_close(av_codec_ctx);
+ av_buffer_unref(&av_hw_device);
+}
+
+void Codec::InitializeHwdec() {
+ // Prioritize integrated GPU to mitigate bandwidth bottlenecks
+#if defined(LIBVA_FOUND)
+ if (CreateVaapiHwdevice(&av_hw_device)) {
+ const auto hw_device_ctx = av_buffer_ref(av_hw_device);
+ ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
+ av_codec_ctx->hw_device_ctx = hw_device_ctx;
+ av_codec_ctx->get_format = GetHwFormat;
+ return;
+ }
+#endif
+ // TODO more GPU accelerated decoders
}
void Codec::Initialize() {
- AVCodecID codec{AV_CODEC_ID_NONE};
+ AVCodecID codec;
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
codec = AV_CODEC_ID_H264;
@@ -53,22 +101,24 @@ void Codec::Initialize() {
codec = AV_CODEC_ID_VP9;
break;
default:
+ UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return;
}
av_codec = avcodec_find_decoder(codec);
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-
- // TODO(ameerj): libavcodec gpu hw acceleration
-
+ InitializeHwdec();
+ if (!av_codec_ctx->hw_device_ctx) {
+ LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
+ }
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
if (av_error < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
avcodec_close(av_codec_ctx);
+ av_buffer_unref(&av_hw_device);
return;
}
initialized = true;
- return;
}
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
@@ -80,36 +130,64 @@ void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
void Codec::Decode() {
const bool is_first_frame = !initialized;
- if (!initialized) {
+ if (is_first_frame) {
Initialize();
}
-
bool vp9_hidden_frame = false;
- AVPacket packet{};
- av_init_packet(&packet);
std::vector<u8> frame_data;
-
if (current_codec == NvdecCommon::VideoCodec::H264) {
frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
}
-
+ AVPacket packet{};
+ av_init_packet(&packet);
packet.data = frame_data.data();
packet.size = static_cast<s32>(frame_data.size());
-
- avcodec_send_packet(av_codec_ctx, &packet);
-
- if (!vp9_hidden_frame) {
- // Only receive/store visible frames
- AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
- avcodec_receive_frame(av_codec_ctx, frame.get());
- av_frames.push(std::move(frame));
- // Limit queue to 10 frames. Workaround for ZLA decode and queue spam
- if (av_frames.size() > 10) {
- av_frames.pop();
- }
+ if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
+ LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
+ return;
+ }
+ // Only receive/store visible frames
+ if (vp9_hidden_frame) {
+ return;
+ }
+ AVFrame* hw_frame = av_frame_alloc();
+ AVFrame* sw_frame = hw_frame;
+ ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
+ if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
+ LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
+ av_frame_free(&hw_frame);
+ return;
+ }
+ if (!hw_frame->width || !hw_frame->height) {
+ LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
+ av_frame_free(&hw_frame);
+ return;
+ }
+#if defined(LIBVA_FOUND)
+ // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
+ if (hw_frame->format == AV_PIX_FMT_VAAPI) {
+ sw_frame = av_frame_alloc();
+ ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
+ // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
+ // because Intel drivers crash unless using AV_PIX_FMT_NV12
+ sw_frame->format = AV_PIX_FMT_NV12;
+ const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
+ ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
+ av_frame_free(&hw_frame);
+ }
+#endif
+ if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
+ UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
+ av_frame_free(&sw_frame);
+ return;
+ }
+ av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
+ if (av_frames.size() > 10) {
+ LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
+ av_frames.pop();
}
}
@@ -119,7 +197,6 @@ AVFramePtr Codec::GetCurrentFrame() {
if (av_frames.empty()) {
return AVFramePtr{nullptr, AVFrameDeleter};
}
-
AVFramePtr frame = std::move(av_frames.front());
av_frames.pop();
return frame;
@@ -144,6 +221,5 @@ std::string_view Codec::GetCurrentCodecName() const {
default:
return "Unknown";
}
-};
-
+}
} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 96c823c76..71936203f 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -22,7 +22,6 @@ extern "C" {
namespace Tegra {
class GPU;
-struct VicRegisters;
void AVFrameDeleter(AVFrame* ptr);
using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
@@ -55,10 +54,13 @@ public:
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
+ void InitializeHwdec();
+
bool initialized{};
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
AVCodec* av_codec{nullptr};
+ AVBufferRef* av_hw_device{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
GPU& gpu;
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 902bc2a98..7eecb3991 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -11,6 +11,9 @@
namespace Tegra::Decoder {
namespace {
+constexpr u32 diff_update_probability = 252;
+constexpr u32 frame_sync_code = 0x498342;
+
// Default compressed header probabilities once frame context resets
constexpr Vp9EntropyProbs default_probs{
.y_mode_prob{
@@ -361,8 +364,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
- // order: last, golden, altref, current. It may be worthwhile to track the updates done here
- // to avoid buffering frame data needed for reference frame updating in the header composition.
+ // order: last, golden, altref, current.
std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
vp9_info.frame_offsets.begin());
@@ -384,33 +386,18 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
current_frame.info.bitstream_size);
}
- // Buffer two frames, saving the last show frame info
- if (!next_next_frame.bit_stream.empty()) {
+ if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp{
.info = current_frame.info,
.bit_stream = std::move(current_frame.bit_stream),
};
- next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
- current_frame.info = next_next_frame.info;
- current_frame.bit_stream = std::move(next_next_frame.bit_stream);
- next_next_frame = std::move(temp);
-
- if (!next_frame.bit_stream.empty()) {
- Vp9FrameContainer temp2{
- .info = current_frame.info,
- .bit_stream = std::move(current_frame.bit_stream),
- };
- next_frame.info.show_frame = current_frame.info.last_frame_shown;
- current_frame.info = next_frame.info;
- current_frame.bit_stream = std::move(next_frame.bit_stream);
- next_frame = std::move(temp2);
- } else {
- next_frame.info = current_frame.info;
- next_frame.bit_stream = std::move(current_frame.bit_stream);
- }
+ next_frame.info.show_frame = current_frame.info.last_frame_shown;
+ current_frame.info = next_frame.info;
+ current_frame.bit_stream = std::move(next_frame.bit_stream);
+ next_frame = std::move(temp);
} else {
- next_next_frame.info = current_frame.info;
- next_next_frame.bit_stream = std::move(current_frame.bit_stream);
+ next_frame.info = current_frame.info;
+ next_frame.bit_stream = std::move(current_frame.bit_stream);
}
return current_frame;
}
@@ -613,86 +600,64 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
// Reset context
prev_frame_probs = default_probs;
- swap_next_golden = false;
+ swap_ref_indices = false;
loop_filter_ref_deltas.fill(0);
loop_filter_mode_deltas.fill(0);
-
- // allow frames offsets to stabilize before checking for golden frames
- grace_period = 4;
-
- // On key frames, all frame slots are set to the current frame,
- // so the value of the selected slot doesn't really matter.
- frame_ctxs.fill({current_frame_number, false, default_probs});
+ frame_ctxs.fill(default_probs);
// intra only, meaning the frame can be recreated with no other references
current_frame_info.intra_only = true;
-
} else {
-
if (!current_frame_info.show_frame) {
uncomp_writer.WriteBit(current_frame_info.intra_only);
- if (!current_frame_info.last_frame_was_key) {
- swap_next_golden = !swap_next_golden;
- }
} else {
current_frame_info.intra_only = false;
}
if (!current_frame_info.error_resilient_mode) {
uncomp_writer.WriteU(0, 2); // Reset frame context.
}
-
- // Last, Golden, Altref frames
- std::array<s32, 3> ref_frame_index{0, 1, 2};
-
- // Set when next frame is hidden
- // altref and golden references are swapped
- if (swap_next_golden) {
- ref_frame_index = std::array<s32, 3>{0, 2, 1};
+ const auto& curr_offsets = current_frame_info.frame_offsets;
+ const auto& next_offsets = next_frame.info.frame_offsets;
+ const bool ref_frames_different = curr_offsets[1] != curr_offsets[2];
+ const bool next_references_swap =
+ (next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]);
+ const bool needs_ref_swap = ref_frames_different && next_references_swap;
+ if (needs_ref_swap) {
+ swap_ref_indices = !swap_ref_indices;
}
-
- // update Last Frame
- u64 refresh_frame_flags = 1;
-
- // golden frame may refresh, determined if the next golden frame offset is changed
- bool golden_refresh = false;
- if (grace_period <= 0) {
- for (s32 index = 1; index < 3; ++index) {
- if (current_frame_info.frame_offsets[index] !=
- next_frame.info.frame_offsets[index]) {
- current_frame_info.refresh_frame[index] = true;
- golden_refresh = true;
- grace_period = 3;
- }
+ union {
+ u32 raw;
+ BitField<0, 1, u32> refresh_last;
+ BitField<1, 2, u32> refresh_golden;
+ BitField<2, 1, u32> refresh_alt;
+ } refresh_frame_flags;
+
+ refresh_frame_flags.raw = 0;
+ for (u32 index = 0; index < 3; ++index) {
+ // Refresh indices that use the current frame as an index
+ if (curr_offsets[3] == next_offsets[index]) {
+ refresh_frame_flags.raw |= 1u << index;
}
}
-
- if (current_frame_info.show_frame &&
- (!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
- // Update golden frame
- refresh_frame_flags = swap_next_golden ? 2 : 4;
- }
-
- if (!current_frame_info.show_frame) {
- // Update altref
- refresh_frame_flags = swap_next_golden ? 2 : 4;
- } else if (golden_refresh) {
- refresh_frame_flags = 3;
+ if (swap_ref_indices) {
+ const u32 temp = refresh_frame_flags.refresh_golden;
+ refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value());
+ refresh_frame_flags.refresh_alt.Assign(temp);
}
-
if (current_frame_info.intra_only) {
uncomp_writer.WriteU(frame_sync_code, 24);
- uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
+ uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
uncomp_writer.WriteBit(false); // Render and frame size different.
} else {
- uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
-
- for (s32 index = 1; index < 4; index++) {
+ const bool swap_indices = needs_ref_swap ^ swap_ref_indices;
+ const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2};
+ uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
+ for (size_t index = 1; index < 4; index++) {
uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
}
-
uncomp_writer.WriteBit(true); // Frame size with refs.
uncomp_writer.WriteBit(false); // Render and frame size different.
uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
@@ -714,10 +679,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
frame_ctx_idx = 1;
}
- uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
- prev_frame_probs =
- frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
- frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
+ uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
+ prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header
+ frame_ctxs[frame_ctx_idx] = current_frame_info.entropy;
uncomp_writer.WriteU(current_frame_info.first_level, 6);
uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
@@ -812,7 +776,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
current_frame_info = curr_frame.info;
bitstream = std::move(curr_frame.bit_stream);
}
-
// The uncompressed header routine sets PrevProb parameters needed for the compressed header
auto uncomp_writer = ComposeUncompressedHeader();
std::vector<u8> compressed_header = ComposeCompressedHeader();
@@ -828,13 +791,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
frame.begin() + uncompressed_header.size());
std::copy(bitstream.begin(), bitstream.end(),
frame.begin() + uncompressed_header.size() + compressed_header.size());
-
- // keep track of frame number
- current_frame_number++;
- grace_period--;
-
- // don't display hidden frames
- hidden = !current_frame_info.show_frame;
return frame;
}
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
index 8396c8105..e6e9fc17e 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -14,7 +14,6 @@
namespace Tegra {
class GPU;
-enum class FrameType { KeyFrame = 0, InterFrame = 1 };
namespace Decoder {
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
@@ -124,7 +123,7 @@ public:
/// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const {
- return hidden;
+ return !current_frame_info.show_frame;
}
private:
@@ -178,19 +177,12 @@ private:
std::array<s8, 4> loop_filter_ref_deltas{};
std::array<s8, 2> loop_filter_mode_deltas{};
- bool hidden = false;
- s64 current_frame_number = -2; // since we buffer 2 frames
- s32 grace_period = 6; // frame offsets need to stabilize
- std::array<FrameContexts, 4> frame_ctxs{};
Vp9FrameContainer next_frame{};
- Vp9FrameContainer next_next_frame{};
- bool swap_next_golden{};
+ std::array<Vp9EntropyProbs, 4> frame_ctxs{};
+ bool swap_ref_indices{};
Vp9PictureInfo current_frame_info{};
Vp9EntropyProbs prev_frame_probs{};
-
- s32 diff_update_probability = 252;
- s32 frame_sync_code = 0x498342;
};
} // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 2da14f3ca..6820afa26 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -296,12 +296,6 @@ struct RefPoolElement {
bool refresh{};
};
-struct FrameContexts {
- s64 from;
- bool adapted;
- Vp9EntropyProbs probs;
-};
-
#define ASSERT_POSITION(field_name, position) \
static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
"Field " #field_name " has invalid position")