encoder: Add support for true hardware encoding

This is the last step towards truly efficient encoding on AMD, Nvidia and Intel GPUs. With this we have no software overhead and can directly encode the content that OBS gives us, without going through any intermediate CPU layer. This is effectively what @jp9000 did for the OBS-integrated nvenc, but thanks to FFmpeg it works on all encoders that support D3D11VA acceleration.

With the change, the encoding should now work flawlessly even in very constrained situations (unless OBS itself is being starved of resources). Especially people streaming and recording Ubisoft games will likely see a drastic increase in encoding capability, and thanks to the new options will also be able to get a much higher quality stream and recording with the same hardware.
This commit is contained in:
Michael Fabian 'Xaymar' Dirks
2019-09-29 19:29:00 +02:00
parent 0461b20e1b
commit 545dcd6d50
2 changed files with 255 additions and 162 deletions
+242 -160
View File
@@ -52,6 +52,11 @@ extern "C" {
// - encode_texture/encode // - encode_texture/encode
// I don't understand what get_video_info is actually for in this order, as this postpones initialization to encode... // I don't understand what get_video_info is actually for in this order, as this postpones initialization to encode...
#ifdef WIN32
#define HARDWARE_ENCODING
#include "hwapi/d3d11.hpp"
#endif
// FFmpeg // FFmpeg
#define ST_FFMPEG "FFmpeg" #define ST_FFMPEG "FFmpeg"
#define ST_FFMPEG_CUSTOMSETTINGS "FFmpeg.CustomSettings" #define ST_FFMPEG_CUSTOMSETTINGS "FFmpeg.CustomSettings"
@@ -328,6 +333,7 @@ obsffmpeg::encoder_factory::encoder_factory(const AVCodec* codec) : avcodec_ptr(
#endif #endif
// Hardware encoder? // Hardware encoder?
#ifdef HARDWARE_ENCODING
if (ffmpeg::tools::can_hardware_encode(avcodec_ptr)) { if (ffmpeg::tools::can_hardware_encode(avcodec_ptr)) {
info_fallback.uid = info.uid + "_sw"; info_fallback.uid = info.uid + "_sw";
info_fallback.codec = info.codec; info_fallback.codec = info.codec;
@@ -341,6 +347,7 @@ obsffmpeg::encoder_factory::encoder_factory(const AVCodec* codec) : avcodec_ptr(
info.oei.caps |= OBS_ENCODER_CAP_PASS_TEXTURE; info.oei.caps |= OBS_ENCODER_CAP_PASS_TEXTURE;
} }
#endif
} }
obsffmpeg::encoder_factory::~encoder_factory() {} obsffmpeg::encoder_factory::~encoder_factory() {}
@@ -541,59 +548,8 @@ const obsffmpeg::encoder_info& obsffmpeg::encoder_factory::get_fallback()
return info_fallback; return info_fallback;
} }
obsffmpeg::encoder::encoder(obs_data_t* settings, obs_encoder_t* encoder, bool is_texture_encode) void obsffmpeg::encoder::initialize_sw(obs_data_t* settings)
: _self(encoder), _lag_in_frames(0), _count_send_frames(0), _have_first_frame(false)
{ {
if (is_texture_encode) {
throw obsffmpeg::unsupported_gpu_exception("not implemented yet");
}
_factory = reinterpret_cast<encoder_factory*>(obs_encoder_get_type_data(_self));
// Verify that the codec actually still exists.
_codec = avcodec_find_encoder_by_name(_factory->get_avcodec()->name);
if (!_codec) {
PLOG_ERROR("Failed to find encoder for codec '%s'.", _factory->get_avcodec()->name);
throw std::runtime_error("failed to find codec");
}
// Find Codec UI handler.
_handler = obsffmpeg::find_codec_handler(_codec->name);
// Initialize context.
_context = avcodec_alloc_context3(_codec);
if (!_context) {
PLOG_ERROR("Failed to create context for encoder '%s'.", _codec->name);
throw std::runtime_error("failed to create context");
}
// Settings
/// Rate Control
_context->strict_std_compliance = static_cast<int>(obs_data_get_int(settings, ST_FFMPEG_STANDARDCOMPLIANCE));
_context->debug = 0;
/// Threading
if (_codec->capabilities
& (AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS)) {
if (_codec->capabilities & AV_CODEC_CAP_FRAME_THREADS) {
_context->thread_type |= FF_THREAD_FRAME;
}
if (_codec->capabilities & AV_CODEC_CAP_SLICE_THREADS) {
_context->thread_type |= FF_THREAD_SLICE;
}
int64_t threads = obs_data_get_int(settings, ST_FFMPEG_THREADS);
if (threads > 0) {
_context->thread_count = static_cast<int>(threads);
_lag_in_frames = _context->thread_count;
} else {
_context->thread_count = std::thread::hardware_concurrency();
_lag_in_frames = _context->thread_count;
}
}
// Create 8MB of precached Packet data for use later on.
av_init_packet(&_current_packet);
av_new_packet(&_current_packet, 8 * 1024 * 1024); // 8 MB precached Packet size.
if (_codec->type == AVMEDIA_TYPE_VIDEO) { if (_codec->type == AVMEDIA_TYPE_VIDEO) {
// Initialize Video Encoding // Initialize Video Encoding
auto voi = video_output_get_info(obs_encoder_video(_self)); auto voi = video_output_get_info(obs_encoder_video(_self));
@@ -656,28 +612,106 @@ obsffmpeg::encoder::encoder(obs_data_t* settings, obs_encoder_t* encoder, bool i
<< (_swscale.is_source_full_range() ? "full" : "partial") << " range."; << (_swscale.is_source_full_range() ? "full" : "partial") << " range.";
throw std::runtime_error(sstr.str()); throw std::runtime_error(sstr.str());
} }
// Create Frame queue
_frame_queue.set_pixel_format(_context->pix_fmt);
_frame_queue.set_resolution(_context->width, _context->height);
_frame_queue.precache(2);
}
}
void obsffmpeg::encoder::initialize_hw(obs_data_t* settings)
{
// Initialize Video Encoding
auto voi = video_output_get_info(obs_encoder_video(_self));
_context->width = voi->width;
_context->height = voi->height;
_context->field_order = AV_FIELD_PROGRESSIVE;
_context->ticks_per_frame = 1;
_context->sample_aspect_ratio.num = _context->sample_aspect_ratio.den = 1;
_context->framerate.num = _context->time_base.den = voi->fps_num;
_context->framerate.den = _context->time_base.num = voi->fps_den;
ffmpeg::tools::setup_obs_color(voi->colorspace, voi->range, _context);
_context->sw_pix_fmt = ffmpeg::tools::obs_videoformat_to_avpixelformat(voi->format);
#ifdef WIN32
_context->pix_fmt = AV_PIX_FMT_D3D11;
#endif
_context->hw_device_ctx = _hwinst->create_device_context();
_context->hw_frames_ctx = av_hwframe_ctx_alloc(_context->hw_device_ctx);
if (!_context->hw_frames_ctx)
throw std::runtime_error("Failed to allocate AVHWFramesContext.");
AVHWFramesContext* ctx = reinterpret_cast<AVHWFramesContext*>(_context->hw_frames_ctx->data);
ctx->width = _context->width;
ctx->height = _context->height;
ctx->format = _context->pix_fmt;
ctx->sw_format = _context->sw_pix_fmt;
if (av_hwframe_ctx_init(_context->hw_frames_ctx) < 0)
throw std::runtime_error("Failed to initialize AVHWFramesContext.");
}
obsffmpeg::encoder::encoder(obs_data_t* settings, obs_encoder_t* encoder, bool is_texture_encode)
: _self(encoder), _lag_in_frames(0), _count_send_frames(0), _have_first_frame(false)
{
// Initial set up.
_factory = reinterpret_cast<encoder_factory*>(obs_encoder_get_type_data(_self));
_codec = _factory->get_avcodec();
_handler = obsffmpeg::find_codec_handler(_codec->name);
if (is_texture_encode) {
#ifdef WIN32
_hwapi = std::make_shared<obsffmpeg::hwapi::d3d11>();
#endif
obsffmpeg::hwapi::device dev;
if (_handler)
dev = _handler->find_hw_device(_hwapi, _codec, _context);
try {
_hwinst = _hwapi->create(dev);
} catch (...) {
throw obsffmpeg::unsupported_gpu_exception("Creating GPU context failed.");
}
} }
{ // Log Encoder info // Initialize context.
PLOG_INFO("[%s] Initializing...", _codec->name); _context = avcodec_alloc_context3(_codec);
PLOG_INFO("[%s] Video Input: %ldx%ld %s %s %s", _codec->name, _swscale.get_source_width(), if (!_context) {
_swscale.get_source_height(), PLOG_ERROR("Failed to create context for encoder '%s'.", _codec->name);
ffmpeg::tools::get_pixel_format_name(_swscale.get_source_format()), throw std::runtime_error("failed to create context");
ffmpeg::tools::get_color_space_name(_swscale.get_source_colorspace()),
_swscale.is_source_full_range() ? "Full" : "Partial");
PLOG_INFO("[%s] Video Output: %ldx%ld %s %s %s", _codec->name, _swscale.get_target_width(),
_swscale.get_target_height(),
ffmpeg::tools::get_pixel_format_name(_swscale.get_target_format()),
ffmpeg::tools::get_color_space_name(_swscale.get_target_colorspace()),
_swscale.is_target_full_range() ? "Full" : "Partial");
PLOG_INFO("[%s] Framerate: %ld/%ld (%f FPS)", _codec->name, _context->time_base.den,
_context->time_base.num,
static_cast<double_t>(_context->time_base.den)
/ static_cast<double_t>(_context->time_base.num));
PLOG_INFO("[%s] Custom Settings: %s", _codec->name,
obs_data_get_string(settings, ST_FFMPEG_CUSTOMSETTINGS));
} }
// Create 8MB of precached Packet data for use later on.
av_init_packet(&_current_packet);
av_new_packet(&_current_packet, 8 * 1024 * 1024); // 8 MB precached Packet size.
if (!is_texture_encode) {
initialize_sw(settings);
} else {
try {
initialize_hw(settings);
} catch (...) {
throw obsffmpeg::unsupported_gpu_exception("Initializing hardware context failed.");
}
}
// Log Encoder info
PLOG_INFO("[%s] Initializing...", _codec->name);
PLOG_INFO("[%s] Video Input: %ldx%ld %s %s %s", _codec->name, _swscale.get_source_width(),
_swscale.get_source_height(), ffmpeg::tools::get_pixel_format_name(_swscale.get_source_format()),
ffmpeg::tools::get_color_space_name(_swscale.get_source_colorspace()),
_swscale.is_source_full_range() ? "Full" : "Partial");
PLOG_INFO("[%s] Video Output: %ldx%ld %s %s %s", _codec->name, _swscale.get_target_width(),
_swscale.get_target_height(), ffmpeg::tools::get_pixel_format_name(_swscale.get_target_format()),
ffmpeg::tools::get_color_space_name(_swscale.get_target_colorspace()),
_swscale.is_target_full_range() ? "Full" : "Partial");
PLOG_INFO("[%s] Framerate: %ld/%ld (%f FPS)", _codec->name, _context->time_base.den, _context->time_base.num,
static_cast<double_t>(_context->time_base.den) / static_cast<double_t>(_context->time_base.num));
PLOG_INFO("[%s] Custom Settings: %s", _codec->name, obs_data_get_string(settings, ST_FFMPEG_CUSTOMSETTINGS));
// Update settings // Update settings
update(settings); update(settings);
@@ -689,11 +723,6 @@ obsffmpeg::encoder::encoder(obs_data_t* settings, obs_encoder_t* encoder, bool i
<< "' failed with error: " << ffmpeg::tools::get_error_description(res) << " (code " << res << ")"; << "' failed with error: " << ffmpeg::tools::get_error_description(res) << " (code " << res << ")";
throw std::runtime_error(sstr.str()); throw std::runtime_error(sstr.str());
} }
// Create Frame queue
_frame_queue.set_pixel_format(_context->pix_fmt);
_frame_queue.set_resolution(_context->width, _context->height);
_frame_queue.precache(2);
} }
obsffmpeg::encoder::~encoder() obsffmpeg::encoder::~encoder()
@@ -737,6 +766,29 @@ void obsffmpeg::encoder::get_properties(obs_properties_t* props)
bool obsffmpeg::encoder::update(obs_data_t* settings) bool obsffmpeg::encoder::update(obs_data_t* settings)
{ {
// Settings
/// Rate Control
_context->strict_std_compliance = static_cast<int>(obs_data_get_int(settings, ST_FFMPEG_STANDARDCOMPLIANCE));
_context->debug = 0;
/// Threading
if (_codec->capabilities
& (AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS)) {
if (_codec->capabilities & AV_CODEC_CAP_FRAME_THREADS) {
_context->thread_type |= FF_THREAD_FRAME;
}
if (_codec->capabilities & AV_CODEC_CAP_SLICE_THREADS) {
_context->thread_type |= FF_THREAD_SLICE;
}
int64_t threads = obs_data_get_int(settings, ST_FFMPEG_THREADS);
if (threads > 0) {
_context->thread_count = static_cast<int>(threads);
_lag_in_frames = _context->thread_count;
} else {
_context->thread_count = std::thread::hardware_concurrency();
_lag_in_frames = _context->thread_count;
}
}
if (_handler) if (_handler)
_handler->update(settings, _codec, _context); _handler->update(settings, _codec, _context);
@@ -874,88 +926,8 @@ bool obsffmpeg::encoder::video_encode(encoder_frame* frame, encoder_packet* pack
} }
} }
// Send and receive frames. if (!encode_avframe(vframe, packet, received_packet))
{ return false;
#ifdef _DEBUG
ScopeProfiler profile("loop");
#endif
bool sent_frame = false;
bool recv_packet = false;
bool should_lag = (_lag_in_frames - _count_send_frames) <= 0;
auto loop_begin = std::chrono::high_resolution_clock::now();
auto loop_end = loop_begin + std::chrono::milliseconds(50);
while ((!sent_frame || (should_lag && !recv_packet))
&& !(std::chrono::high_resolution_clock::now() > loop_end)) {
bool eagain_is_stupid = false;
if (!sent_frame) {
#ifdef _DEBUG
ScopeProfiler profile_inner("send");
#endif
int res = send_frame(vframe);
switch (res) {
case 0:
sent_frame = true;
vframe = nullptr;
break;
case AVERROR(EAGAIN):
// This means we should call receive_packet again, but what do we do with that data?
// Why can't we queue on both? Do I really have to implement threading for this stuff?
if (*received_packet == true) {
PLOG_WARNING(
"Skipped frame due to EAGAIN when a packet was already returned.");
sent_frame = true;
}
eagain_is_stupid = true;
break;
case AVERROR(EOF):
PLOG_ERROR("Skipped frame due to end of stream.");
sent_frame = true;
break;
default:
PLOG_ERROR("Failed to encode frame: %s (%ld).",
ffmpeg::tools::get_error_description(res), res);
return false;
}
}
if (!recv_packet) {
#ifdef _DEBUG
ScopeProfiler profile_inner("recieve");
#endif
int res = receive_packet(received_packet, packet);
switch (res) {
case 0:
recv_packet = true;
break;
case AVERROR(EOF):
PLOG_ERROR("Received end of file.");
recv_packet = true;
break;
case AVERROR(EAGAIN):
if (sent_frame) {
recv_packet = true;
}
if (eagain_is_stupid) {
PLOG_ERROR("Both send and recieve returned EAGAIN, encoder is broken.");
return false;
}
break;
default:
PLOG_ERROR("Failed to receive packet: %s (%ld).",
ffmpeg::tools::get_error_description(res), res);
return false;
}
}
if (!sent_frame || !recv_packet) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
}
if (vframe != nullptr) { if (vframe != nullptr) {
_frame_queue.push(vframe); _frame_queue.push(vframe);
@@ -964,9 +936,34 @@ bool obsffmpeg::encoder::video_encode(encoder_frame* frame, encoder_packet* pack
return true; return true;
} }
bool obsffmpeg::encoder::video_encode_texture(uint32_t, int64_t, uint64_t, uint64_t*, encoder_packet*, bool*) bool obsffmpeg::encoder::video_encode_texture(uint32_t handle, int64_t pts, uint64_t lock_key, uint64_t* next_lock_key,
encoder_packet* packet, bool* received_packet)
{ {
return false; if (handle == GS_INVALID_HANDLE) {
PLOG_ERROR("Received invalid handle.");
*next_lock_key = lock_key;
return false;
}
std::shared_ptr<AVFrame> frame{av_frame_alloc(), [](void* ptr) {
av_frame_unref(reinterpret_cast<AVFrame*>(ptr));
av_frame_free(reinterpret_cast<AVFrame**>(&ptr));
}};
std::shared_ptr<AVFrame> vframe = _hwinst->avframe_from_obs(_context->hw_frames_ctx, handle, lock_key, next_lock_key);
vframe->color_range = _context->color_range;
vframe->colorspace = _context->colorspace;
vframe->color_primaries = _context->color_primaries;
vframe->color_trc = _context->color_trc;
vframe->pts = pts;
if (!encode_avframe(vframe, packet, received_packet))
return false;
*next_lock_key = lock_key;
return true;
} }
int obsffmpeg::encoder::receive_packet(bool* received_packet, struct encoder_packet* packet) int obsffmpeg::encoder::receive_packet(bool* received_packet, struct encoder_packet* packet)
@@ -1039,7 +1036,8 @@ int obsffmpeg::encoder::send_frame(std::shared_ptr<AVFrame> const frame)
int res = avcodec_send_frame(_context, frame.get()); int res = avcodec_send_frame(_context, frame.get());
switch (res) { switch (res) {
case 0: case 0:
_frame_queue_used.push(frame); if (!_hwapi)
_frame_queue_used.push(frame);
_count_send_frames++; _count_send_frames++;
case AVERROR(EAGAIN): case AVERROR(EAGAIN):
case AVERROR(EOF): case AVERROR(EOF):
@@ -1047,3 +1045,87 @@ int obsffmpeg::encoder::send_frame(std::shared_ptr<AVFrame> const frame)
} }
return res; return res;
} }
bool obsffmpeg::encoder::encode_avframe(std::shared_ptr<AVFrame>& frame, encoder_packet* packet, bool* received_packet)
{
#ifdef _DEBUG
ScopeProfiler profile("loop");
#endif
bool sent_frame = false;
bool recv_packet = false;
bool should_lag = (_lag_in_frames - _count_send_frames) <= 0;
auto loop_begin = std::chrono::high_resolution_clock::now();
auto loop_end = loop_begin + std::chrono::milliseconds(50);
while ((!sent_frame || (should_lag && !recv_packet))
&& !(std::chrono::high_resolution_clock::now() > loop_end)) {
bool eagain_is_stupid = false;
if (!sent_frame) {
#ifdef _DEBUG
ScopeProfiler profile_inner("send");
#endif
int res = send_frame(frame);
switch (res) {
case 0:
sent_frame = true;
frame = nullptr;
break;
case AVERROR(EAGAIN):
// This means we should call receive_packet again, but what do we do with that data?
// Why can't we queue on both? Do I really have to implement threading for this stuff?
if (*received_packet == true) {
PLOG_WARNING("Skipped frame due to EAGAIN when a packet was already returned.");
sent_frame = true;
}
eagain_is_stupid = true;
break;
case AVERROR(EOF):
PLOG_ERROR("Skipped frame due to end of stream.");
sent_frame = true;
break;
default:
PLOG_ERROR("Failed to encode frame: %s (%ld).",
ffmpeg::tools::get_error_description(res), res);
return false;
}
}
if (!recv_packet) {
#ifdef _DEBUG
ScopeProfiler profile_inner("recieve");
#endif
int res = receive_packet(received_packet, packet);
switch (res) {
case 0:
recv_packet = true;
break;
case AVERROR(EOF):
PLOG_ERROR("Received end of file.");
recv_packet = true;
break;
case AVERROR(EAGAIN):
if (sent_frame) {
recv_packet = true;
}
if (eagain_is_stupid) {
PLOG_ERROR("Both send and recieve returned EAGAIN, encoder is broken.");
return false;
}
break;
default:
PLOG_ERROR("Failed to receive packet: %s (%ld).",
ffmpeg::tools::get_error_description(res), res);
return false;
}
}
if (!sent_frame || !recv_packet) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
return true;
}
+13 -2
View File
@@ -27,6 +27,7 @@
#include <vector> #include <vector>
#include "ffmpeg/avframe-queue.hpp" #include "ffmpeg/avframe-queue.hpp"
#include "ffmpeg/swscale.hpp" #include "ffmpeg/swscale.hpp"
#include "hwapi/base.hpp"
#include "ui/handler.hpp" #include "ui/handler.hpp"
extern "C" { extern "C" {
@@ -80,11 +81,15 @@ namespace obsffmpeg {
obs_encoder_t* _self; obs_encoder_t* _self;
encoder_factory* _factory; encoder_factory* _factory;
const AVCodec* _codec; const AVCodec* _codec;
AVCodecContext* _context; AVCodecContext* _context;
AVHWFramesContext* _hwcontext;
std::shared_ptr<obsffmpeg::ui::handler> _handler; std::shared_ptr<obsffmpeg::ui::handler> _handler;
std::shared_ptr<obsffmpeg::hwapi::base> _hwapi;
std::shared_ptr<obsffmpeg::hwapi::instance> _hwinst;
ffmpeg::avframe_queue _frame_queue; ffmpeg::avframe_queue _frame_queue;
ffmpeg::avframe_queue _frame_queue_used; ffmpeg::avframe_queue _frame_queue_used;
ffmpeg::swscale _swscale; ffmpeg::swscale _swscale;
@@ -98,6 +103,9 @@ namespace obsffmpeg {
std::vector<uint8_t> _extra_data; std::vector<uint8_t> _extra_data;
std::vector<uint8_t> _sei_data; std::vector<uint8_t> _sei_data;
void initialize_sw(obs_data_t* settings);
void initialize_hw(obs_data_t* settings);
public: public:
encoder(obs_data_t* settings, obs_encoder_t* encoder, bool is_texture_encode = false); encoder(obs_data_t* settings, obs_encoder_t* encoder, bool is_texture_encode = false);
virtual ~encoder(); virtual ~encoder();
@@ -130,5 +138,8 @@ namespace obsffmpeg {
int receive_packet(bool* received_packet, struct encoder_packet* packet); int receive_packet(bool* received_packet, struct encoder_packet* packet);
int send_frame(std::shared_ptr<AVFrame> frame); int send_frame(std::shared_ptr<AVFrame> frame);
bool encode_avframe(std::shared_ptr<AVFrame>& frame, struct encoder_packet* packet,
bool* received_packet);
}; };
} // namespace obsffmpeg } // namespace obsffmpeg